tests/amdgpu/ras: refine ras inject test

Ras inject test framework is invalid with original codes,
so refine it to make it work on top of kernel ras inject
feature enablement.

Signed-off-by: Dennis Li <dennis.li@amd.com>
Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
main
Guchun Chen 2019-07-31 17:06:06 +08:00 committed by Alex Deucher
parent 0000162504
commit 1ef1e4db96
5 changed files with 522 additions and 57 deletions

View File

@ -430,10 +430,24 @@ if test "x$AMDGPU" != xno; then
AC_SUBST([CUNIT_CFLAGS]) AC_SUBST([CUNIT_CFLAGS])
fi fi
fi fi
# Detect json-c library
PKG_CHECK_MODULES([JSONC], [json-c >= 0.10.1], [have_jsonc=yes], [have_jsonc=no])
if test "x${have_jsonc}" = "xno"; then
AC_CHECK_LIB([json-c], [json_object_object_get], [have_jsonc=yes], [have_jsonc=no])
if test "x${have_jsonc}" = "xyes"; then
JSONC_LIBS="-ljson-c"
JSONC_CFLAGS=""
AC_SUBST([JSONC_LIBS])
AC_SUBST([JSONC_CFLAGS])
fi
fi
else else
have_cunit=no have_cunit=no
have_jsonc=no
fi fi
AM_CONDITIONAL(HAVE_CUNIT, [test "x$have_cunit" != "xno"]) AM_CONDITIONAL(HAVE_CUNIT, [test "x$have_cunit" != "xno"])
AM_CONDITIONAL(HAVE_JSONC, [test "x$have_jsonc" != "xno"])
AM_CONDITIONAL(HAVE_AMDGPU, [test "x$AMDGPU" = xyes]) AM_CONDITIONAL(HAVE_AMDGPU, [test "x$AMDGPU" = xyes])
if test "x$AMDGPU" = xyes; then if test "x$AMDGPU" = xyes; then
@ -442,6 +456,10 @@ if test "x$AMDGPU" = xyes; then
if test "x$have_cunit" = "xno"; then if test "x$have_cunit" = "xno"; then
AC_MSG_WARN([Could not find cunit library. Disabling amdgpu tests]) AC_MSG_WARN([Could not find cunit library. Disabling amdgpu tests])
fi fi
if test "x$have_jsonc" = "xno"; then
AC_MSG_WARN([Could not find json-c library. Disabling amdgpu tests])
fi
else else
AC_DEFINE(HAVE_AMDGPU, 0) AC_DEFINE(HAVE_AMDGPU, 0)
fi fi

View File

@ -217,6 +217,7 @@ libdrm_c_args = warn_c_args + ['-fvisibility=hidden']
dep_pciaccess = dependency('pciaccess', version : '>= 0.10', required : with_intel) dep_pciaccess = dependency('pciaccess', version : '>= 0.10', required : with_intel)
dep_cunit = dependency('cunit', version : '>= 2.1', required : false) dep_cunit = dependency('cunit', version : '>= 2.1', required : false)
dep_json = dependency('json-c', version : '>= 0.10.1', required : false)
_cairo_tests = get_option('cairo-tests') _cairo_tests = get_option('cairo-tests')
if _cairo_tests != 'false' if _cairo_tests != 'false'
dep_cairo = dependency('cairo', required : _cairo_tests == 'true') dep_cairo = dependency('cairo', required : _cairo_tests == 'true')

View File

@ -7,7 +7,8 @@ AM_CFLAGS = \
LDADD = $(top_builddir)/libdrm.la \ LDADD = $(top_builddir)/libdrm.la \
$(top_builddir)/amdgpu/libdrm_amdgpu.la \ $(top_builddir)/amdgpu/libdrm_amdgpu.la \
$(CUNIT_LIBS) $(CUNIT_LIBS) \
$(JSONC_LIBS)
if HAVE_INSTALL_TESTS if HAVE_INSTALL_TESTS
bin_PROGRAMS = \ bin_PROGRAMS = \
@ -17,7 +18,7 @@ noinst_PROGRAMS = \
amdgpu_test amdgpu_test
endif endif
amdgpu_test_CPPFLAGS = $(CUNIT_CFLAGS) amdgpu_test_CPPFLAGS = $(CUNIT_CFLAGS) $(JSONC_CFLAGS)
amdgpu_test_SOURCES = \ amdgpu_test_SOURCES = \
amdgpu_test.c \ amdgpu_test.c \

View File

@ -18,7 +18,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.
if dep_cunit.found() if dep_cunit.found() and dep_json.found()
amdgpu_test = executable( amdgpu_test = executable(
'amdgpu_test', 'amdgpu_test',
files( files(
@ -26,9 +26,19 @@ if dep_cunit.found()
'vce_tests.c', 'uvd_enc_tests.c', 'vcn_tests.c', 'deadlock_tests.c', 'vce_tests.c', 'uvd_enc_tests.c', 'vcn_tests.c', 'deadlock_tests.c',
'vm_tests.c', 'ras_tests.c', 'syncobj_tests.c', 'vm_tests.c', 'ras_tests.c', 'syncobj_tests.c',
), ),
dependencies : [dep_cunit, dep_threads], dependencies : [dep_cunit, dep_json, dep_threads],
include_directories : [inc_root, inc_drm, include_directories('../../amdgpu')], include_directories : [inc_root, inc_drm, include_directories('../../amdgpu')],
link_with : [libdrm, libdrm_amdgpu], link_with : [libdrm, libdrm_amdgpu],
install : with_install_tests, install : with_install_tests,
) )
endif
configure_file(input : '../../data/amdgpu_ras.json',
output : 'amdgpu_ras.json',
configuration : configuration_data())
install_data(
'../../data/amdgpu_ras.json',
install_mode : 'rw-r--r--',
install_dir : datadir_amdgpu,
)
endif

View File

@ -30,6 +30,7 @@
#include <fcntl.h> #include <fcntl.h>
#include <stdio.h> #include <stdio.h>
#include "xf86drm.h" #include "xf86drm.h"
#include "json.h"
const char *ras_block_string[] = { const char *ras_block_string[] = {
"umc", "umc",
@ -72,11 +73,252 @@ enum amdgpu_ras_block {
#define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST
#define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
enum amdgpu_ras_gfx_subblock {
/* CPC */
AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
/* CPF */
AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
/* CPG */
AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
/* GDS */
AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
/* SPI */
AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
/* SQ */
AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
/* SQC (3 ranges) */
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
/* SQC range 0 */
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
/* SQC range 1 */
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
/* SQC range 2 */
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
/* TA */
AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
/* TCA */
AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
/* TCC (5 sub-ranges) */
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
/* TCC range 0 */
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
/* TCC range 1 */
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
/* TCC range 2 */
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
/* TCC range 3 */
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
/* TCC range 4 */
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
/* TCI */
AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
/* TCP */
AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
/* TD */
AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
/* EA (3 sub-ranges) */
AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
/* EA range 0 */
AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
/* EA range 1 */
AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
/* EA range 2 */
AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
/* UTC VM L2 bank */
AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
/* UTC VM walker */
AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
/* UTC ATC L2 2MB cache */
AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
/* UTC ATC L2 4KB cache */
AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
AMDGPU_RAS_BLOCK__GFX_MAX
};
enum amdgpu_ras_error_type { enum amdgpu_ras_error_type {
AMDGPU_RAS_ERROR__NONE = 0, AMDGPU_RAS_ERROR__NONE = 0,
AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2, AMDGPU_RAS_ERROR__PARITY = 1,
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4, AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2,
AMDGPU_RAS_ERROR__POISON = 8, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4,
AMDGPU_RAS_ERROR__POISON = 8,
};
struct ras_test_item {
char name[64];
int block;
int sub_block;
char error_type_str[64];
enum amdgpu_ras_error_type type;
uint64_t address;
uint64_t value;
}; };
struct ras_common_if { struct ras_common_if {
@ -100,8 +342,10 @@ struct ras_debug_if {
int op; int op;
}; };
/* for now, only umc, gfx, sdma has implemented. */ /* for now, only umc, gfx, sdma has implemented. */
#define DEFAULT_RAS_BLOCK_MASK_INJECT (1 << AMDGPU_RAS_BLOCK__UMC) #define DEFAULT_RAS_BLOCK_MASK_INJECT ((1 << AMDGPU_RAS_BLOCK__UMC) |\
#define DEFAULT_RAS_BLOCK_MASK_QUERY (1 << AMDGPU_RAS_BLOCK__UMC) (1 << AMDGPU_RAS_BLOCK__GFX))
#define DEFAULT_RAS_BLOCK_MASK_QUERY ((1 << AMDGPU_RAS_BLOCK__UMC) |\
(1 << AMDGPU_RAS_BLOCK__GFX))
#define DEFAULT_RAS_BLOCK_MASK_BASIC (1 << AMDGPU_RAS_BLOCK__UMC |\ #define DEFAULT_RAS_BLOCK_MASK_BASIC (1 << AMDGPU_RAS_BLOCK__UMC |\
(1 << AMDGPU_RAS_BLOCK__SDMA) |\ (1 << AMDGPU_RAS_BLOCK__SDMA) |\
(1 << AMDGPU_RAS_BLOCK__GFX)) (1 << AMDGPU_RAS_BLOCK__GFX))
@ -453,6 +697,34 @@ static int amdgpu_ras_query_err_count(enum amdgpu_ras_block block,
return 0; return 0;
} }
static int amdgpu_ras_inject(enum amdgpu_ras_block block,
uint32_t sub_block, enum amdgpu_ras_error_type type,
uint64_t address, uint64_t value)
{
struct ras_debug_if data = { .op = 2, };
struct ras_inject_if *inject = &data.inject;
int ret;
if (amdgpu_ras_is_feature_enabled(block) <= 0) {
fprintf(stderr, "block id(%d) is not valid\n", block);
return -1;
}
inject->head.block = block;
inject->head.type = type;
inject->head.sub_block_index = sub_block;
strncpy(inject->head.name, ras_block_str(block), 32);
inject->address = address;
inject->value = value;
ret = amdgpu_ras_invoke(&data);
CU_ASSERT_EQUAL(ret, 0);
if (ret)
return -1;
return 0;
}
//tests //tests
static void amdgpu_ras_features_test(int enable) static void amdgpu_ras_features_test(int enable)
{ {
@ -503,66 +775,229 @@ static void amdgpu_ras_enable_test(void)
} }
} }
static void __amdgpu_ras_inject_test(void) static int _json_get_block_id(json_object *block_obj, const char *name)
{ {
struct ras_debug_if data; json_object *item_obj, *index_obj;
int ret;
int i;
unsigned long ue, ce, ue_old, ce_old;
data.op = 2; if (!json_object_object_get_ex(block_obj, name, &item_obj))
for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) { return -1;
int timeout = 3;
struct ras_inject_if inject = {
.head = {
.block = i,
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
.sub_block_index = 0,
.name = "",
},
.address = 0,
.value = 0,
};
if (amdgpu_ras_is_feature_enabled(i) <= 0) if (!json_object_object_get_ex(item_obj, "index", &index_obj))
continue; return -1;
if (!((1 << i) & ras_block_mask_inject)) return json_object_get_int(index_obj);
continue; }
data.inject = inject; static int _json_get_subblock_id(json_object *block_obj, const char *block_name,
const char *subblock_name)
{
json_object *item_obj, *subblock_obj, *name_obj;
ret = amdgpu_ras_query_err_count(i, &ue_old, &ce_old); if (!json_object_object_get_ex(block_obj, block_name, &item_obj))
CU_ASSERT_EQUAL(ret, 0); return -1;
if (ret) if (!json_object_object_get_ex(item_obj, "subblock", &subblock_obj))
continue; return -1;
ret = amdgpu_ras_invoke(&data); if (!json_object_object_get_ex(subblock_obj, subblock_name, &name_obj))
CU_ASSERT_EQUAL(ret, 0); return -1;
if (ret) return json_object_get_int(name_obj);
continue; }
loop: static int amdgpu_ras_get_test_items(struct ras_test_item **pitems, int *size)
while (timeout > 0) { {
ret = amdgpu_ras_query_err_count(i, &ue, &ce); json_object *root_obj = NULL;
CU_ASSERT_EQUAL(ret, 0); json_object *block_obj = NULL;
json_object *type_obj = NULL;
json_object *tests_obj = NULL;
json_object *test_obj = NULL;
json_object *tmp_obj = NULL;
json_object *tmp_type_obj = NULL;
json_object *subblock_obj = NULL;
int i, length;
struct ras_test_item *items = NULL;
int ret = -1;
if (ret) root_obj = json_object_from_file("./amdgpu_ras.json");
continue; if (!root_obj)
if (ue_old != ue) { root_obj = json_object_from_file(
/*recovery takes ~10s*/ "/usr/share/libdrm/amdgpu_ras.json");
sleep(10);
break;
}
sleep(1); if (!root_obj) {
timeout -= 1; CU_FAIL_FATAL("Couldn't find amdgpu_ras.json");
goto pro_end;
}
/* Check Version */
if (!json_object_object_get_ex(root_obj, "version", &tmp_obj)) {
CU_FAIL_FATAL("Wrong format of amdgpu_ras.json");
goto pro_end;
}
/* Block Definition */
if (!json_object_object_get_ex(root_obj, "block", &block_obj)) {
fprintf(stderr, "block isn't defined\n");
goto pro_end;
}
/* Type Definition */
if (!json_object_object_get_ex(root_obj, "type", &type_obj)) {
fprintf(stderr, "type isn't defined\n");
goto pro_end;
}
/* Enumulate test items */
if (!json_object_object_get_ex(root_obj, "tests", &tests_obj)) {
fprintf(stderr, "tests are empty\n");
goto pro_end;
}
length = json_object_array_length(tests_obj);
items = malloc(sizeof(struct ras_test_item) * length);
if (!items) {
fprintf(stderr, "malloc failed\n");
goto pro_end;
}
for (i = 0; i < length; i++) {
test_obj = json_object_array_get_idx(tests_obj, i);
/* Name */
if (!json_object_object_get_ex(test_obj, "name", &tmp_obj)) {
fprintf(stderr, "Test %d has no name\n", i);
goto pro_end;
}
strncpy(items[i].name, json_object_get_string(tmp_obj), 64);
/* block */
if (!json_object_object_get_ex(test_obj, "block", &tmp_obj)) {
fprintf(stderr, "Test:%s: block isn't defined\n",
items[i].name);
goto pro_end;
}
items[i].block = _json_get_block_id(
block_obj, json_object_get_string(tmp_obj));
/* check block id */
if (items[i].block < AMDGPU_RAS_BLOCK__UMC ||
items[i].block >= AMDGPU_RAS_BLOCK__LAST) {
fprintf(stderr, "Test:%s: block id %d is invalid\n",
items[i].name, items[i].block);
goto pro_end;
} }
CU_ASSERT_EQUAL(ue_old + 1, ue); /* subblock */
CU_ASSERT_EQUAL(ce_old, ce); if (json_object_object_get_ex(test_obj, "subblock", &tmp_obj)) {
json_object_object_get_ex(test_obj, "block",
&subblock_obj);
items[i].sub_block = _json_get_subblock_id(
block_obj,
json_object_get_string(subblock_obj),
json_object_get_string(tmp_obj));
if (items[i].sub_block < 0) {
fprintf(stderr, "Test:%s: subblock in block id %d is invalid\n",
items[i].name, items[i].block);
goto pro_end;
}
} else
items[i].sub_block = 0;
/* type */
if (json_object_object_get_ex(test_obj, "type", &tmp_obj)) {
strncpy(items[i].error_type_str,
json_object_get_string(tmp_obj), 64);
if (json_object_object_get_ex(type_obj,
json_object_get_string(tmp_obj), &tmp_type_obj))
items[i].type = json_object_get_int(tmp_type_obj);
else
items[i].type = (enum amdgpu_ras_error_type)0;
}
/* address */
if (json_object_object_get_ex(test_obj, "address", &tmp_obj))
items[i].address = json_object_get_int(tmp_obj);
else
items[i].address = 0; /* default address 0 */
/* value */
if (json_object_object_get_ex(test_obj, "value", &tmp_obj))
items[i].value = json_object_get_int(tmp_obj);
else
items[i].value = 0; /* default value 0 */
}
*pitems = items;
*size = length;
ret = 0;
pro_end:
if (root_obj)
json_object_put(root_obj);
return ret;
}
static void __amdgpu_ras_inject_test(void)
{
struct ras_test_item *items = NULL;
int i, size;
int ret;
unsigned long old_ue, old_ce;
unsigned long ue, ce;
int timeout;
bool pass;
ret = amdgpu_ras_get_test_items(&items, &size);
CU_ASSERT_EQUAL(ret, 0);
if (ret)
goto mem_free;
printf("...\n");
for (i = 0; i < size; i++) {
timeout = 3;
pass = false;
ret = amdgpu_ras_query_err_count(items[i].block, &old_ue,
&old_ce);
CU_ASSERT_EQUAL(ret, 0);
if (ret)
break;
ret = amdgpu_ras_inject(items[i].block, items[i].sub_block,
items[i].type, items[i].address,
items[i].value);
CU_ASSERT_EQUAL(ret, 0);
if (ret)
break;
while (timeout > 0) {
sleep(5);
ret = amdgpu_ras_query_err_count(items[i].block, &ue,
&ce);
CU_ASSERT_EQUAL(ret, 0);
if (ret)
break;
if (old_ue != ue || old_ce != ce) {
pass = true;
sleep(20);
break;
}
timeout -= 1;
}
printf("\t Test %s@%s, address %ld, value %ld: %s\n",
items[i].name, items[i].error_type_str, items[i].address,
items[i].value, pass ? "Pass" : "Fail");
}
mem_free:
if (items) {
free(items);
items = NULL;
} }
} }