From 6070e6a798a12eb13e4ea4a6f6dea6878b86c4f4 Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Mon, 18 Jul 2022 11:41:23 -0400 Subject: [PATCH] tests/amdgpu/vcn: add unified queue support in vcn4 add unified queue headers on the existing tests. Reviewed-by: Boyuan Zhang Signed-off-by: Ruijing Dong --- tests/amdgpu/vcn_tests.c | 193 +++++++++++++++++++++++++++++++-------- 1 file changed, 153 insertions(+), 40 deletions(-) diff --git a/tests/amdgpu/vcn_tests.c b/tests/amdgpu/vcn_tests.c index e094b455..5e20fb65 100644 --- a/tests/amdgpu/vcn_tests.c +++ b/tests/amdgpu/vcn_tests.c @@ -63,6 +63,7 @@ #define DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER (0x00100000) static bool vcn_dec_sw_ring = false; +static bool vcn_unified_ring = false; #define H264_NAL_TYPE_NON_IDR_SLICE 1 #define H264_NAL_TYPE_DP_A_SLICE 2 @@ -172,6 +173,9 @@ static amdgpu_bo_handle ib_handle; static amdgpu_va_handle ib_va_handle; static uint64_t ib_mc_address; static uint32_t *ib_cpu; +static uint32_t *ib_checksum; +static uint32_t *ib_size_in_dw; + static rvcn_decode_buffer_t *decode_buffer; static amdgpu_bo_handle resources[MAX_RESOURCES]; @@ -185,8 +189,8 @@ static struct amdgpu_vcn_reg reg[] = { }; uint32_t gWidth, gHeight, gSliceType; -struct drm_amdgpu_info_hw_ip einfo; - +static uint32_t vcn_ip_version_major; +static uint32_t vcn_ip_version_minor; static void amdgpu_cs_vcn_dec_create(void); static void amdgpu_cs_vcn_dec_decode(void); static void amdgpu_cs_vcn_dec_destroy(void); @@ -195,6 +199,8 @@ static void amdgpu_cs_vcn_enc_create(void); static void amdgpu_cs_vcn_enc_encode(void); static void amdgpu_cs_vcn_enc_destroy(void); +static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc); +static void amdgpu_cs_sq_ib_tail(uint32_t *end); static void h264_check_0s (bufferInfo * bufInfo, int count); static int32_t h264_se (bufferInfo * bufInfo); static inline uint32_t bs_read_u1(bufferInfo *bufinfo); @@ -224,7 +230,8 @@ CU_TestInfo vcn_tests[] = { CU_BOOL suite_vcn_tests_enable(void) { struct drm_amdgpu_info_hw_ip info; - int r, ret; + bool enc_ring, dec_ring; + int r; if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, &minor_version, &device_handle)) @@ -235,13 +242,31 @@ CU_BOOL suite_vcn_tests_enable(void) chip_rev = device_handle->info.chip_rev; chip_id = device_handle->info.chip_external_rev; - r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info); - ret = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &einfo); + r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &info); + if (!r) { + vcn_ip_version_major = info.hw_ip_version_major; + vcn_ip_version_minor = info.hw_ip_version_minor; + enc_ring = !!info.available_rings; + /* in vcn 4.0 it re-uses encoding queue as unified queue */ + if (vcn_ip_version_major >= 4) { + vcn_unified_ring = true; + vcn_dec_sw_ring = true; + dec_ring = enc_ring; + } else { + r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info); + dec_ring = !!info.available_rings; + } + } if (amdgpu_device_deinitialize(device_handle)) return CU_FALSE; - if (r != 0 || !info.available_rings || + if (r) { + printf("\n\nASIC query hw info failed\n"); + return CU_FALSE; + } + + if (!(dec_ring || enc_ring) || (family_id < AMDGPU_FAMILY_RV && (family_id == AMDGPU_FAMILY_AI && (chip_id - chip_rev) < 0x32))) { /* Arcturus */ @@ -249,22 +274,25 @@ CU_BOOL suite_vcn_tests_enable(void) return CU_FALSE; } - if (family_id == AMDGPU_FAMILY_AI || (ret != 0) || - (!einfo.available_rings)) { + if (!dec_ring) { + amdgpu_set_test_active("VCN Tests", "VCN DEC create", CU_FALSE); + amdgpu_set_test_active("VCN Tests", "VCN DEC decode", CU_FALSE); + amdgpu_set_test_active("VCN Tests", "VCN DEC destroy", CU_FALSE); + } + + if (family_id == AMDGPU_FAMILY_AI || !enc_ring) { amdgpu_set_test_active("VCN Tests", "VCN ENC create", CU_FALSE); amdgpu_set_test_active("VCN Tests", "VCN ENC encode", CU_FALSE); amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE); } - if (info.hw_ip_version_major == 1) + if (vcn_ip_version_major == 1) vcn_reg_index = 0; - else if (info.hw_ip_version_major == 2 && info.hw_ip_version_minor == 0) + else if (vcn_ip_version_major == 2 && vcn_ip_version_minor == 0) vcn_reg_index = 1; - else if ((info.hw_ip_version_major == 2 && info.hw_ip_version_minor >= 5) || - info.hw_ip_version_major == 3) + else if ((vcn_ip_version_major == 2 && vcn_ip_version_minor >= 5) || + vcn_ip_version_major == 3) vcn_reg_index = 2; - else - vcn_dec_sw_ring = true; return CU_TRUE; } @@ -314,6 +342,43 @@ int suite_vcn_tests_clean(void) return CUE_SUCCESS; } +static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc) +{ + /* signature */ + *(base + (*offset)++) = 0x00000010; + *(base + (*offset)++) = 0x30000002; + ib_checksum = base + (*offset)++; + ib_size_in_dw = base + (*offset)++; + + /* engine info */ + *(base + (*offset)++) = 0x00000010; + *(base + (*offset)++) = 0x30000001; + *(base + (*offset)++) = enc ? 2 : 3; + *(base + (*offset)++) = 0x00000000; +} + +static void amdgpu_cs_sq_ib_tail(uint32_t *end) +{ + uint32_t size_in_dw; + uint32_t checksum = 0; + + /* if the pointers are invalid, no need to process */ + if (ib_checksum == NULL || ib_size_in_dw == NULL) + return; + + size_in_dw = end - ib_size_in_dw - 1; + *ib_size_in_dw = size_in_dw; + *(ib_size_in_dw + 4) = size_in_dw * sizeof(uint32_t); + + for (int i = 0; i < size_in_dw; i++) + checksum += *(ib_checksum + 2 + i); + + *ib_checksum = checksum; + + ib_checksum = NULL; + ib_size_in_dw = NULL; +} + static int submit(unsigned ndw, unsigned ip) { struct amdgpu_cs_request ibs_request = {0}; @@ -419,10 +484,15 @@ static void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx) /* Support decode software ring message */ if (!(*idx)) { - rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)ib_cpu; + rvcn_decode_ib_package_t *ib_header; + if (vcn_unified_ring) + amdgpu_cs_sq_head(ib_cpu, idx, false); + + ib_header = (rvcn_decode_ib_package_t *)&ib_cpu[*idx]; ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s); + (*idx)++; ib_header->package_type = (DECODE_IB_PARAM_DECODE_BUFFER); (*idx)++; @@ -486,6 +556,7 @@ static void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx) static void amdgpu_cs_vcn_dec_create(void) { struct amdgpu_vcn_bo msg_buf; + unsigned ip; int len, r; num_resources = 0; @@ -500,9 +571,9 @@ static void amdgpu_cs_vcn_dec_create(void) memcpy(msg_buf.ptr, vcn_dec_create_msg, sizeof(vcn_dec_create_msg)); len = 0; - if (vcn_dec_sw_ring == true) { + if (vcn_dec_sw_ring == true) vcn_dec_cmd(msg_buf.addr, 0, &len); - } else { + else { ib_cpu[len++] = reg[vcn_reg_index].data0; ib_cpu[len++] = msg_buf.addr; ib_cpu[len++] = reg[vcn_reg_index].data1; @@ -515,7 +586,14 @@ static void amdgpu_cs_vcn_dec_create(void) } } - r = submit(len, AMDGPU_HW_IP_VCN_DEC); + if (vcn_unified_ring) { + amdgpu_cs_sq_ib_tail(ib_cpu + len); + ip = AMDGPU_HW_IP_VCN_ENC; + } else + ip = AMDGPU_HW_IP_VCN_DEC; + + r = submit(len, ip); + CU_ASSERT_EQUAL(r, 0); free_resource(&msg_buf); @@ -527,6 +605,7 @@ static void amdgpu_cs_vcn_dec_decode(void) uint64_t msg_addr, fb_addr, bs_addr, dpb_addr, ctx_addr, dt_addr, it_addr, sum; struct amdgpu_vcn_bo dec_buf; int size, len, i, r; + unsigned ip; uint8_t *dec; size = 4*1024; /* msg */ @@ -588,7 +667,13 @@ static void amdgpu_cs_vcn_dec_decode(void) } } - r = submit(len, AMDGPU_HW_IP_VCN_DEC); + if (vcn_unified_ring) { + amdgpu_cs_sq_ib_tail(ib_cpu + len); + ip = AMDGPU_HW_IP_VCN_ENC; + } else + ip = AMDGPU_HW_IP_VCN_DEC; + + r = submit(len, ip); CU_ASSERT_EQUAL(r, 0); for (i = 0, sum = 0; i < dt_size; ++i) @@ -602,6 +687,7 @@ static void amdgpu_cs_vcn_dec_decode(void) static void amdgpu_cs_vcn_dec_destroy(void) { struct amdgpu_vcn_bo msg_buf; + unsigned ip; int len, r; num_resources = 0; @@ -616,9 +702,9 @@ static void amdgpu_cs_vcn_dec_destroy(void) memcpy(msg_buf.ptr, vcn_dec_destroy_msg, sizeof(vcn_dec_destroy_msg)); len = 0; - if (vcn_dec_sw_ring == true) { + if (vcn_dec_sw_ring == true) vcn_dec_cmd(msg_buf.addr, 0, &len); - } else { + else { ib_cpu[len++] = reg[vcn_reg_index].data0; ib_cpu[len++] = msg_buf.addr; ib_cpu[len++] = reg[vcn_reg_index].data1; @@ -631,7 +717,13 @@ static void amdgpu_cs_vcn_dec_destroy(void) } } - r = submit(len, AMDGPU_HW_IP_VCN_DEC); + if (vcn_unified_ring) { + amdgpu_cs_sq_ib_tail(ib_cpu + len); + ip = AMDGPU_HW_IP_VCN_ENC; + } else + ip = AMDGPU_HW_IP_VCN_DEC; + + r = submit(len, ip); CU_ASSERT_EQUAL(r, 0); free_resource(&msg_buf); @@ -646,10 +738,10 @@ static void amdgpu_cs_vcn_enc_create(void) unsigned width = 160, height = 128, buf_size; uint32_t fw_maj = 1, fw_min = 9; - if (einfo.hw_ip_version_major == 2) { + if (vcn_ip_version_major == 2) { fw_maj = 1; fw_min = 1; - } else if (einfo.hw_ip_version_major == 3) { + } else if (vcn_ip_version_major == 3) { fw_maj = 1; fw_min = 0; } @@ -675,6 +767,10 @@ static void amdgpu_cs_vcn_enc_create(void) r = amdgpu_bo_cpu_unmap(cpb_buf.handle); len = 0; + + if (vcn_unified_ring) + amdgpu_cs_sq_head(ib_cpu, &len, true); + /* session info */ st_offset = len; st_size = &ib_cpu[len++]; /* size */ @@ -733,7 +829,7 @@ static void amdgpu_cs_vcn_enc_create(void) ib_cpu[len++] = 1; /* quarter pel enabled */ ib_cpu[len++] = 100; /* BASELINE profile */ ib_cpu[len++] = 11; /* level */ - if (einfo.hw_ip_version_major == 3) { + if (vcn_ip_version_major == 3) { ib_cpu[len++] = 0; /* b_picture_enabled */ ib_cpu[len++] = 0; /* weighted_bipred_idc */ } @@ -774,7 +870,7 @@ static void amdgpu_cs_vcn_enc_create(void) ib_cpu[len++] = 0; /* scene change sensitivity */ ib_cpu[len++] = 0; /* scene change min idr interval */ ib_cpu[len++] = 0; - if (einfo.hw_ip_version_major == 3) + if (vcn_ip_version_major == 3) ib_cpu[len++] = 0; *st_size = (len - st_offset) * 4; @@ -833,6 +929,9 @@ static void amdgpu_cs_vcn_enc_create(void) *p_task_size = (len - task_offset) * 4; + if (vcn_unified_ring) + amdgpu_cs_sq_ib_tail(ib_cpu + len); + r = submit(len, AMDGPU_HW_IP_VCN_ENC); CU_ASSERT_EQUAL(r, 0); } @@ -1176,10 +1275,10 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) uint32_t *st_size = NULL; uint32_t fw_maj = 1, fw_min = 9; - if (einfo.hw_ip_version_major == 2) { + if (vcn_ip_version_major == 2) { fw_maj = 1; fw_min = 1; - } else if (einfo.hw_ip_version_major == 3) { + } else if (vcn_ip_version_major == 3) { fw_maj = 1; fw_min = 0; } @@ -1216,6 +1315,10 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) CU_ASSERT_EQUAL(r, 0); len = 0; + + if (vcn_unified_ring) + amdgpu_cs_sq_head(ib_cpu, &len, true); + /* session info */ st_offset = len; st_size = &ib_cpu[len++]; /* size */ @@ -1240,7 +1343,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) /* sps */ st_offset = len; st_size = &ib_cpu[len++]; /* size */ - if(einfo.hw_ip_version_major == 1) + if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */ else ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3 */ @@ -1256,7 +1359,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) /* pps */ st_offset = len; st_size = &ib_cpu[len++]; /* size */ - if(einfo.hw_ip_version_major == 1) + if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/ else ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3*/ @@ -1270,7 +1373,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) /* slice header */ st_offset = len; st_size = &ib_cpu[len++]; /* size */ - if(einfo.hw_ip_version_major == 1) + if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */ else ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 2,3 */ @@ -1303,7 +1406,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) /* encode params */ st_offset = len; st_size = &ib_cpu[len++]; /* size */ - if(einfo.hw_ip_version_major == 1) + if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1*/ else ib_cpu[len++] = 0x0000000f; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 2,3*/ @@ -1324,7 +1427,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) st_offset = len; st_size = &ib_cpu[len++]; /* size */ ib_cpu[len++] = 0x00200003; /* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */ - if (einfo.hw_ip_version_major != 3) { + if (vcn_ip_version_major != 3) { ib_cpu[len++] = 0x00000000; ib_cpu[len++] = 0x00000000; ib_cpu[len++] = 0x00000000; @@ -1353,7 +1456,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) /* encode context */ st_offset = len; st_size = &ib_cpu[len++]; /* size */ - if(einfo.hw_ip_version_major == 1) + if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x0000000d; /* ENCODE_CONTEXT_BUFFER vcn 1 */ else ib_cpu[len++] = 0x00000011; /* ENCODE_CONTEXT_BUFFER vcn 2,3 */ @@ -1375,7 +1478,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) /* bitstream buffer */ st_offset = len; st_size = &ib_cpu[len++]; /* size */ - if(einfo.hw_ip_version_major == 1) + if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x0000000e; /* VIDEO_BITSTREAM_BUFFER vcn 1 */ else ib_cpu[len++] = 0x00000012; /* VIDEO_BITSTREAM_BUFFER vcn 2,3 */ @@ -1389,7 +1492,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) /* feedback */ st_offset = len; st_size = &ib_cpu[len++]; /* size */ - if(einfo.hw_ip_version_major == 1) + if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x00000010; /* FEEDBACK_BUFFER vcn 1 */ else ib_cpu[len++] = 0x00000015; /* FEEDBACK_BUFFER vcn 2,3 */ @@ -1403,7 +1506,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) /* intra refresh */ st_offset = len; st_size = &ib_cpu[len++]; - if(einfo.hw_ip_version_major == 1) + if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x0000000c; /* INTRA_REFRESH vcn 1 */ else ib_cpu[len++] = 0x00000010; /* INTRA_REFRESH vcn 2,3 */ @@ -1412,7 +1515,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) ib_cpu[len++] = 0x00000000; *st_size = (len - st_offset) * 4; - if(einfo.hw_ip_version_major != 1) { + if(vcn_ip_version_major != 1) { /* Input Format */ st_offset = len; st_size = &ib_cpu[len++]; @@ -1449,6 +1552,10 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) *st_size = (len - st_offset) * 4; *p_task_size = (len - task_offset) * 4; + + if (vcn_unified_ring) + amdgpu_cs_sq_ib_tail(ib_cpu + len); + r = submit(len, AMDGPU_HW_IP_VCN_ENC); CU_ASSERT_EQUAL(r, 0); @@ -1473,10 +1580,10 @@ static void amdgpu_cs_vcn_enc_destroy(void) uint32_t *st_size = NULL; uint32_t fw_maj = 1, fw_min = 9; - if (einfo.hw_ip_version_major == 2) { + if (vcn_ip_version_major == 2) { fw_maj = 1; fw_min = 1; - } else if (einfo.hw_ip_version_major == 3) { + } else if (vcn_ip_version_major == 3) { fw_maj = 1; fw_min = 0; } @@ -1486,6 +1593,9 @@ static void amdgpu_cs_vcn_enc_destroy(void) resources[num_resources++] = enc_buf.handle; resources[num_resources++] = ib_handle; + if (vcn_unified_ring) + amdgpu_cs_sq_head(ib_cpu, &len, true); + /* session info */ st_offset = len; st_size = &ib_cpu[len++]; /* size */ @@ -1514,6 +1624,9 @@ static void amdgpu_cs_vcn_enc_destroy(void) *p_task_size = (len - task_offset) * 4; + if (vcn_unified_ring) + amdgpu_cs_sq_ib_tail(ib_cpu + len); + r = submit(len, AMDGPU_HW_IP_VCN_ENC); CU_ASSERT_EQUAL(r, 0);