amdgpu: implement amdgpu_cs_query_reset_state
v2: also return the number of hangs Reviewed-by: Christian König <christian.koenig@amd.com> Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>main
parent
d3e7195066
commit
4b39a8e7cf
|
@ -97,26 +97,6 @@ enum amdgpu_bo_handle_type {
|
||||||
amdgpu_bo_handle_type_dma_buf_fd = 2
|
amdgpu_bo_handle_type_dma_buf_fd = 2
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* Enum describing possible context reset states
|
|
||||||
*
|
|
||||||
* \sa amdgpu_cs_query_reset_state()
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
enum amdgpu_cs_ctx_reset_state {
|
|
||||||
/** No reset was detected */
|
|
||||||
amdgpu_cs_reset_no_error = 0,
|
|
||||||
|
|
||||||
/** Reset/TDR was detected and context caused */
|
|
||||||
amdgpu_cs_reset_guilty = 1,
|
|
||||||
|
|
||||||
/** Reset/TDR was detected caused by other context */
|
|
||||||
amdgpu_cs_reset_innocent = 2,
|
|
||||||
|
|
||||||
/** Reset TDR was detected by cause of it unknown */
|
|
||||||
amdgpu_cs_reset_unknown = 3
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For performance reasons and to simplify logic libdrm_amdgpu will handle
|
* For performance reasons and to simplify logic libdrm_amdgpu will handle
|
||||||
* IBs only some pre-defined sizes.
|
* IBs only some pre-defined sizes.
|
||||||
|
@ -920,7 +900,8 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context);
|
||||||
* Query reset state for the specific GPU Context
|
* Query reset state for the specific GPU Context
|
||||||
*
|
*
|
||||||
* \param context - \c [in] GPU Context handle
|
* \param context - \c [in] GPU Context handle
|
||||||
* \param state - \c [out] Reset state status
|
* \param state - \c [out] One of AMDGPU_CTX_*_RESET
|
||||||
|
* \param hangs - \c [out] Number of hangs caused by the context.
|
||||||
*
|
*
|
||||||
* \return 0 on success\n
|
* \return 0 on success\n
|
||||||
* >0 - AMD specific error code\n
|
* >0 - AMD specific error code\n
|
||||||
|
@ -930,7 +911,7 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context);
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
|
int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
|
||||||
enum amdgpu_cs_ctx_reset_state *state);
|
uint32_t *state, uint32_t *hangs);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -611,6 +611,27 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
|
||||||
|
uint32_t *state, uint32_t *hangs)
|
||||||
|
{
|
||||||
|
union drm_amdgpu_ctx args;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (!context)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
memset(&args, 0, sizeof(args));
|
||||||
|
args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
|
||||||
|
args.in.ctx_id = context->id;
|
||||||
|
r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
|
||||||
|
&args, sizeof(args));
|
||||||
|
if (!r) {
|
||||||
|
*state = args.out.state.reset_status;
|
||||||
|
*hangs = args.out.state.hangs;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring)
|
static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring)
|
||||||
{
|
{
|
||||||
return ip * AMDGPU_CS_MAX_RINGS + ring;
|
return ip * AMDGPU_CS_MAX_RINGS + ring;
|
||||||
|
|
|
@ -149,6 +149,12 @@ union drm_amdgpu_bo_list {
|
||||||
|
|
||||||
#define AMDGPU_CTX_OP_STATE_RUNNING 1
|
#define AMDGPU_CTX_OP_STATE_RUNNING 1
|
||||||
|
|
||||||
|
/* GPU reset status */
|
||||||
|
#define AMDGPU_CTX_NO_RESET 0
|
||||||
|
#define AMDGPU_CTX_GUILTY_RESET 1 /* this the context caused it */
|
||||||
|
#define AMDGPU_CTX_INNOCENT_RESET 2 /* some other context caused it */
|
||||||
|
#define AMDGPU_CTX_UNKNOWN_RESET 3 /* unknown cause */
|
||||||
|
|
||||||
struct drm_amdgpu_ctx_in {
|
struct drm_amdgpu_ctx_in {
|
||||||
uint32_t op;
|
uint32_t op;
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
|
@ -164,7 +170,10 @@ union drm_amdgpu_ctx_out {
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
uint64_t flags;
|
uint64_t flags;
|
||||||
uint64_t hangs;
|
/** Number of resets caused by this context so far. */
|
||||||
|
uint32_t hangs;
|
||||||
|
/** Reset status since the last call of the ioctl. */
|
||||||
|
uint32_t reset_status;
|
||||||
} state;
|
} state;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue