From 4b39a8e7cfff20a9cf8512541b9fc764b42da974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 5 May 2015 21:23:02 +0200 Subject: [PATCH] amdgpu: implement amdgpu_cs_query_reset_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: also return the number of hangs Reviewed-by: Christian König Reviewed-by: Jammy Zhou --- amdgpu/amdgpu.h | 25 +++---------------------- amdgpu/amdgpu_cs.c | 21 +++++++++++++++++++++ include/drm/amdgpu_drm.h | 11 ++++++++++- 3 files changed, 34 insertions(+), 23 deletions(-) diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 0997bd73..c48af922 100644 --- a/amdgpu/amdgpu.h +++ b/amdgpu/amdgpu.h @@ -97,26 +97,6 @@ enum amdgpu_bo_handle_type { amdgpu_bo_handle_type_dma_buf_fd = 2 }; -/** - * Enum describing possible context reset states - * - * \sa amdgpu_cs_query_reset_state() - * -*/ -enum amdgpu_cs_ctx_reset_state { - /** No reset was detected */ - amdgpu_cs_reset_no_error = 0, - - /** Reset/TDR was detected and context caused */ - amdgpu_cs_reset_guilty = 1, - - /** Reset/TDR was detected caused by other context */ - amdgpu_cs_reset_innocent = 2, - - /** Reset TDR was detected by cause of it unknown */ - amdgpu_cs_reset_unknown = 3 -}; - /** * For performance reasons and to simplify logic libdrm_amdgpu will handle * IBs only some pre-defined sizes. @@ -920,7 +900,8 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context); * Query reset state for the specific GPU Context * * \param context - \c [in] GPU Context handle - * \param state - \c [out] Reset state status + * \param state - \c [out] One of AMDGPU_CTX_*_RESET + * \param hangs - \c [out] Number of hangs caused by the context. * * \return 0 on success\n * >0 - AMD specific error code\n @@ -930,7 +911,7 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context); * */ int amdgpu_cs_query_reset_state(amdgpu_context_handle context, - enum amdgpu_cs_ctx_reset_state *state); + uint32_t *state, uint32_t *hangs); /* diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c index ef3e403b..c8101b85 100644 --- a/amdgpu/amdgpu_cs.c +++ b/amdgpu/amdgpu_cs.c @@ -611,6 +611,27 @@ int amdgpu_cs_ctx_free(amdgpu_context_handle context) return r; } +int amdgpu_cs_query_reset_state(amdgpu_context_handle context, + uint32_t *state, uint32_t *hangs) +{ + union drm_amdgpu_ctx args; + int r; + + if (!context) + return -EINVAL; + + memset(&args, 0, sizeof(args)); + args.in.op = AMDGPU_CTX_OP_QUERY_STATE; + args.in.ctx_id = context->id; + r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX, + &args, sizeof(args)); + if (!r) { + *state = args.out.state.reset_status; + *hangs = args.out.state.hangs; + } + return r; +} + static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring) { return ip * AMDGPU_CS_MAX_RINGS + ring; diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index f61ec0cc..81b495ed 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -149,6 +149,12 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_STATE_RUNNING 1 +/* GPU reset status */ +#define AMDGPU_CTX_NO_RESET 0 +#define AMDGPU_CTX_GUILTY_RESET 1 /* this the context caused it */ +#define AMDGPU_CTX_INNOCENT_RESET 2 /* some other context caused it */ +#define AMDGPU_CTX_UNKNOWN_RESET 3 /* unknown cause */ + struct drm_amdgpu_ctx_in { uint32_t op; uint32_t flags; @@ -164,7 +170,10 @@ union drm_amdgpu_ctx_out { struct { uint64_t flags; - uint64_t hangs; + /** Number of resets caused by this context so far. */ + uint32_t hangs; + /** Reset status since the last call of the ioctl. */ + uint32_t reset_status; } state; };