amdgpu: implement amdgpu_cs_query_reset_state
v2: also return the number of hangs Reviewed-by: Christian König <christian.koenig@amd.com> Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>main
parent
d3e7195066
commit
4b39a8e7cf
|
@ -97,26 +97,6 @@ enum amdgpu_bo_handle_type {
|
|||
amdgpu_bo_handle_type_dma_buf_fd = 2
|
||||
};
|
||||
|
||||
/**
|
||||
* Enum describing possible context reset states
|
||||
*
|
||||
* \sa amdgpu_cs_query_reset_state()
|
||||
*
|
||||
*/
|
||||
enum amdgpu_cs_ctx_reset_state {
|
||||
/** No reset was detected */
|
||||
amdgpu_cs_reset_no_error = 0,
|
||||
|
||||
/** Reset/TDR was detected and context caused */
|
||||
amdgpu_cs_reset_guilty = 1,
|
||||
|
||||
/** Reset/TDR was detected caused by other context */
|
||||
amdgpu_cs_reset_innocent = 2,
|
||||
|
||||
/** Reset TDR was detected by cause of it unknown */
|
||||
amdgpu_cs_reset_unknown = 3
|
||||
};
|
||||
|
||||
/**
|
||||
* For performance reasons and to simplify logic libdrm_amdgpu will handle
|
||||
* IBs only some pre-defined sizes.
|
||||
|
@ -920,7 +900,8 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context);
|
|||
* Query reset state for the specific GPU Context
|
||||
*
|
||||
* \param context - \c [in] GPU Context handle
|
||||
* \param state - \c [out] Reset state status
|
||||
* \param state - \c [out] One of AMDGPU_CTX_*_RESET
|
||||
* \param hangs - \c [out] Number of hangs caused by the context.
|
||||
*
|
||||
* \return 0 on success\n
|
||||
* >0 - AMD specific error code\n
|
||||
|
@ -930,7 +911,7 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context);
|
|||
*
|
||||
*/
|
||||
int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
|
||||
enum amdgpu_cs_ctx_reset_state *state);
|
||||
uint32_t *state, uint32_t *hangs);
|
||||
|
||||
|
||||
/*
|
||||
|
|
|
@ -611,6 +611,27 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context)
|
|||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
|
||||
uint32_t *state, uint32_t *hangs)
|
||||
{
|
||||
union drm_amdgpu_ctx args;
|
||||
int r;
|
||||
|
||||
if (!context)
|
||||
return -EINVAL;
|
||||
|
||||
memset(&args, 0, sizeof(args));
|
||||
args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
|
||||
args.in.ctx_id = context->id;
|
||||
r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
|
||||
&args, sizeof(args));
|
||||
if (!r) {
|
||||
*state = args.out.state.reset_status;
|
||||
*hangs = args.out.state.hangs;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring)
|
||||
{
|
||||
return ip * AMDGPU_CS_MAX_RINGS + ring;
|
||||
|
|
|
@ -149,6 +149,12 @@ union drm_amdgpu_bo_list {
|
|||
|
||||
#define AMDGPU_CTX_OP_STATE_RUNNING 1
|
||||
|
||||
/* GPU reset status */
|
||||
#define AMDGPU_CTX_NO_RESET 0
|
||||
#define AMDGPU_CTX_GUILTY_RESET 1 /* this the context caused it */
|
||||
#define AMDGPU_CTX_INNOCENT_RESET 2 /* some other context caused it */
|
||||
#define AMDGPU_CTX_UNKNOWN_RESET 3 /* unknown cause */
|
||||
|
||||
struct drm_amdgpu_ctx_in {
|
||||
uint32_t op;
|
||||
uint32_t flags;
|
||||
|
@ -164,7 +170,10 @@ union drm_amdgpu_ctx_out {
|
|||
|
||||
struct {
|
||||
uint64_t flags;
|
||||
uint64_t hangs;
|
||||
/** Number of resets caused by this context so far. */
|
||||
uint32_t hangs;
|
||||
/** Reset status since the last call of the ioctl. */
|
||||
uint32_t reset_status;
|
||||
} state;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue