amdgpu: implement amdgpu_cs_query_reset_state

v2: also return the number of hangs

Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
main
Marek Olšák 2015-05-05 21:23:02 +02:00 committed by Alex Deucher
parent d3e7195066
commit 4b39a8e7cf
3 changed files with 34 additions and 23 deletions

View File

@ -97,26 +97,6 @@ enum amdgpu_bo_handle_type {
amdgpu_bo_handle_type_dma_buf_fd = 2 amdgpu_bo_handle_type_dma_buf_fd = 2
}; };
/**
* Enum describing possible context reset states
*
* \sa amdgpu_cs_query_reset_state()
*
*/
enum amdgpu_cs_ctx_reset_state {
/** No reset was detected */
amdgpu_cs_reset_no_error = 0,
/** Reset/TDR was detected and context caused */
amdgpu_cs_reset_guilty = 1,
/** Reset/TDR was detected caused by other context */
amdgpu_cs_reset_innocent = 2,
/** Reset TDR was detected by cause of it unknown */
amdgpu_cs_reset_unknown = 3
};
/** /**
* For performance reasons and to simplify logic libdrm_amdgpu will handle * For performance reasons and to simplify logic libdrm_amdgpu will handle
* IBs only some pre-defined sizes. * IBs only some pre-defined sizes.
@ -920,7 +900,8 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context);
* Query reset state for the specific GPU Context * Query reset state for the specific GPU Context
* *
* \param context - \c [in] GPU Context handle * \param context - \c [in] GPU Context handle
* \param state - \c [out] Reset state status * \param state - \c [out] One of AMDGPU_CTX_*_RESET
* \param hangs - \c [out] Number of hangs caused by the context.
* *
* \return 0 on success\n * \return 0 on success\n
* >0 - AMD specific error code\n * >0 - AMD specific error code\n
@ -930,7 +911,7 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context);
* *
*/ */
int amdgpu_cs_query_reset_state(amdgpu_context_handle context, int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
enum amdgpu_cs_ctx_reset_state *state); uint32_t *state, uint32_t *hangs);
/* /*

View File

@ -611,6 +611,27 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context)
return r; return r;
} }
int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
uint32_t *state, uint32_t *hangs)
{
union drm_amdgpu_ctx args;
int r;
if (!context)
return -EINVAL;
memset(&args, 0, sizeof(args));
args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
args.in.ctx_id = context->id;
r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
&args, sizeof(args));
if (!r) {
*state = args.out.state.reset_status;
*hangs = args.out.state.hangs;
}
return r;
}
static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring) static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring)
{ {
return ip * AMDGPU_CS_MAX_RINGS + ring; return ip * AMDGPU_CS_MAX_RINGS + ring;

View File

@ -149,6 +149,12 @@ union drm_amdgpu_bo_list {
#define AMDGPU_CTX_OP_STATE_RUNNING 1 #define AMDGPU_CTX_OP_STATE_RUNNING 1
/* GPU reset status */
#define AMDGPU_CTX_NO_RESET 0
#define AMDGPU_CTX_GUILTY_RESET 1 /* this the context caused it */
#define AMDGPU_CTX_INNOCENT_RESET 2 /* some other context caused it */
#define AMDGPU_CTX_UNKNOWN_RESET 3 /* unknown cause */
struct drm_amdgpu_ctx_in { struct drm_amdgpu_ctx_in {
uint32_t op; uint32_t op;
uint32_t flags; uint32_t flags;
@ -164,7 +170,10 @@ union drm_amdgpu_ctx_out {
struct { struct {
uint64_t flags; uint64_t flags;
uint64_t hangs; /** Number of resets caused by this context so far. */
uint32_t hangs;
/** Reset status since the last call of the ioctl. */
uint32_t reset_status;
} state; } state;
}; };