diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index cb30c0ae..451437da 100644 --- a/amdgpu/amdgpu.h +++ b/amdgpu/amdgpu.h @@ -78,23 +78,6 @@ enum amdgpu_bo_handle_type { amdgpu_bo_handle_type_dma_buf_fd = 2 }; -/** - * For performance reasons and to simplify logic libdrm_amdgpu will handle - * IBs only some pre-defined sizes. - * - * \sa amdgpu_cs_alloc_ib() - */ -enum amdgpu_cs_ib_size { - amdgpu_cs_ib_size_4K = 0, - amdgpu_cs_ib_size_16K = 1, - amdgpu_cs_ib_size_32K = 2, - amdgpu_cs_ib_size_64K = 3, - amdgpu_cs_ib_size_128K = 4 -}; - -/** The number of different IB sizes */ -#define AMDGPU_CS_IB_SIZE_NUM 5 - /*--------------------------------------------------------------------------*/ /* -------------------------- Datatypes ----------------------------------- */ @@ -290,23 +273,6 @@ struct amdgpu_gds_alloc_info { uint32_t oa; }; -/** - * Structure to described allocated command buffer (a.k.a. IB) - * - * \sa amdgpu_cs_alloc_ib() - * -*/ -struct amdgpu_cs_ib_alloc_result { - /** IB allocation handle */ - amdgpu_bo_handle handle; - - /** Assigned GPU VM MC Address of command buffer */ - uint64_t mc_address; - - /** Address to be used for CPU access */ - void *cpu; -}; - /** * Structure describing IB * @@ -923,42 +889,6 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context, * */ - -/** - * Allocate memory to be filled with PM4 packets and be served as the first - * entry point of execution (a.k.a. Indirect Buffer) - * - * \param context - \c [in] GPU Context which will use IB - * \param ib_size - \c [in] Size of allocation - * \param output - \c [out] Pointer to structure to get information about - * allocated IB - * - * \return 0 on success\n - * >0 - AMD specific error code\n - * <0 - Negative POSIX Error code - * - * \sa amdgpu_cs_free_ib() - * -*/ -int amdgpu_cs_alloc_ib(amdgpu_context_handle context, - enum amdgpu_cs_ib_size ib_size, - struct amdgpu_cs_ib_alloc_result *output); - -/** - * If UMD has allocates IBs which doesn’t need any more than those IBs must - * be explicitly freed - * - * \param handle - \c [in] IB handle - * - * \return 0 on success\n - * >0 - AMD specific error code\n - * <0 - Negative POSIX Error code - * - * \sa amdgpu_cs_alloc_ib() - * -*/ -int amdgpu_cs_free_ib(amdgpu_bo_handle handle); - /** * Send request to submit command buffers to hardware. * diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c index 1429f26b..2a039d6d 100644 --- a/amdgpu/amdgpu_cs.c +++ b/amdgpu/amdgpu_cs.c @@ -32,94 +32,6 @@ #include "amdgpu_drm.h" #include "amdgpu_internal.h" -/** - * Create an IB buffer. - * - * \param dev - \c [in] Device handle - * \param context - \c [in] GPU Context - * \param ib_size - \c [in] Size of allocation - * \param ib - \c [out] return the pointer to the created IB buffer - * - * \return 0 on success otherwise POSIX Error code -*/ -int amdgpu_cs_alloc_ib(amdgpu_context_handle context, - enum amdgpu_cs_ib_size ib_size, - struct amdgpu_cs_ib_alloc_result *output) -{ - struct amdgpu_bo_alloc_request alloc_buffer = {}; - struct amdgpu_bo_alloc_result info; - int r; - void *cpu; - - if (NULL == context) - return -EINVAL; - if (NULL == output) - return -EINVAL; - if (ib_size >= AMDGPU_CS_IB_SIZE_NUM) - return -EINVAL; - - switch (ib_size) { - case amdgpu_cs_ib_size_4K: - alloc_buffer.alloc_size = 4 * 1024; - break; - case amdgpu_cs_ib_size_16K: - alloc_buffer.alloc_size = 16 * 1024; - break; - case amdgpu_cs_ib_size_32K: - alloc_buffer.alloc_size = 32 * 1024; - break; - case amdgpu_cs_ib_size_64K: - alloc_buffer.alloc_size = 64 * 1024; - break; - case amdgpu_cs_ib_size_128K: - alloc_buffer.alloc_size = 128 * 1024; - break; - default: - return -EINVAL; - } - - alloc_buffer.phys_alignment = 4 * 1024; - alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT; - - r = amdgpu_bo_alloc(context->dev, - &alloc_buffer, - &info); - if (r) - return r; - - r = amdgpu_bo_cpu_map(info.buf_handle, &cpu); - if (r) { - amdgpu_bo_free(info.buf_handle); - return r; - } - - output->handle = info.buf_handle; - output->cpu = cpu; - output->mc_address = info.virtual_mc_base_address; - return 0; -} - -/** - * Destroy an IB buffer. - * - * \param ib - \c [in] IB handle - * - * \return 0 on success otherwise POSIX Error code -*/ -int amdgpu_cs_free_ib(amdgpu_bo_handle bo) -{ - int r; - - if (!bo) - return -EINVAL; - - r = amdgpu_bo_cpu_unmap(bo); - if (r) - return r; - - return amdgpu_bo_free(bo); -} - /** * Create command submission context * diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h index 0062bd7c..d97cb91a 100644 --- a/tests/amdgpu/amdgpu_test.h +++ b/tests/amdgpu/amdgpu_test.h @@ -131,4 +131,33 @@ static inline amdgpu_bo_handle gpu_mem_alloc( return res.buf_handle; } +static inline int +amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size, + unsigned alignment, unsigned heap, uint64_t flags, + amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address) +{ + struct amdgpu_bo_alloc_request request = {}; + struct amdgpu_bo_alloc_result out; + int r; + + request.alloc_size = size; + request.phys_alignment = alignment; + request.preferred_heap = heap; + request.flags = flags; + + r = amdgpu_bo_alloc(dev, &request, &out); + if (r) + return r; + + r = amdgpu_bo_cpu_map(out.buf_handle, cpu); + if (r) { + amdgpu_bo_free(out.buf_handle); + return r; + } + + *bo = out.buf_handle; + *mc_address = out.virtual_mc_base_address; + return 0; +} + #endif /* #ifdef _AMDGPU_TEST_H_ */ diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c index 66847b5f..1afcdac6 100644 --- a/tests/amdgpu/basic_tests.c +++ b/tests/amdgpu/basic_tests.c @@ -156,8 +156,9 @@ static void amdgpu_memory_alloc(void) static void amdgpu_command_submission_gfx_separate_ibs(void) { amdgpu_context_handle context_handle; - struct amdgpu_cs_ib_alloc_result ib_result = {0}; - struct amdgpu_cs_ib_alloc_result ib_result_ce = {0}; + amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; + void *ib_result_cpu, *ib_result_ce_cpu; + uint64_t ib_result_mc_address, ib_result_ce_mc_address; struct amdgpu_cs_request ibs_request = {0}; struct amdgpu_cs_ib_info ib_info[2]; struct amdgpu_cs_query_fence fence_status = {0}; @@ -168,31 +169,35 @@ static void amdgpu_command_submission_gfx_separate_ibs(void) r = amdgpu_cs_ctx_create(device_handle, &context_handle); CU_ASSERT_EQUAL(r, 0); - r = amdgpu_cs_alloc_ib(context_handle, - amdgpu_cs_ib_size_4K, &ib_result); + r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_result_handle, &ib_result_cpu, + &ib_result_mc_address); CU_ASSERT_EQUAL(r, 0); - r = amdgpu_cs_alloc_ib(context_handle, - amdgpu_cs_ib_size_4K, &ib_result_ce); + r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_result_ce_handle, &ib_result_ce_cpu, + &ib_result_ce_mc_address); CU_ASSERT_EQUAL(r, 0); memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); /* IT_SET_CE_DE_COUNTERS */ - ptr = ib_result_ce.cpu; + ptr = ib_result_ce_cpu; ptr[0] = 0xc0008900; ptr[1] = 0; ptr[2] = 0xc0008400; ptr[3] = 1; - ib_info[0].bo_handle = ib_result_ce.handle; + ib_info[0].bo_handle = ib_result_ce_handle; ib_info[0].size = 4; ib_info[0].flags = AMDGPU_IB_FLAG_CE; /* IT_WAIT_ON_CE_COUNTER */ - ptr = ib_result.cpu; + ptr = ib_result_cpu; ptr[0] = 0xc0008600; ptr[1] = 0x00000001; - ib_info[1].bo_handle = ib_result.handle; + ib_info[1].bo_handle = ib_result_handle; ib_info[1].size = 2; ibs_request.ip_type = AMDGPU_HW_IP_GFX; @@ -210,10 +215,10 @@ static void amdgpu_command_submission_gfx_separate_ibs(void) r = amdgpu_cs_query_fence_status(&fence_status, &expired); CU_ASSERT_EQUAL(r, 0); - r = amdgpu_cs_free_ib(ib_result.handle); + r = amdgpu_bo_free(ib_result_handle); CU_ASSERT_EQUAL(r, 0); - r = amdgpu_cs_free_ib(ib_result_ce.handle); + r = amdgpu_bo_free(ib_result_ce_handle); CU_ASSERT_EQUAL(r, 0); r = amdgpu_cs_ctx_free(context_handle); @@ -223,7 +228,9 @@ static void amdgpu_command_submission_gfx_separate_ibs(void) static void amdgpu_command_submission_gfx_shared_ib(void) { amdgpu_context_handle context_handle; - struct amdgpu_cs_ib_alloc_result ib_result = {0}; + amdgpu_bo_handle ib_result_handle; + void *ib_result_cpu; + uint64_t ib_result_mc_address; struct amdgpu_cs_request ibs_request = {0}; struct amdgpu_cs_ib_info ib_info[2]; struct amdgpu_cs_query_fence fence_status = {0}; @@ -234,26 +241,28 @@ static void amdgpu_command_submission_gfx_shared_ib(void) r = amdgpu_cs_ctx_create(device_handle, &context_handle); CU_ASSERT_EQUAL(r, 0); - r = amdgpu_cs_alloc_ib(context_handle, - amdgpu_cs_ib_size_4K, &ib_result); + r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_result_handle, &ib_result_cpu, + &ib_result_mc_address); CU_ASSERT_EQUAL(r, 0); memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); /* IT_SET_CE_DE_COUNTERS */ - ptr = ib_result.cpu; + ptr = ib_result_cpu; ptr[0] = 0xc0008900; ptr[1] = 0; ptr[2] = 0xc0008400; ptr[3] = 1; - ib_info[0].bo_handle = ib_result.handle; + ib_info[0].bo_handle = ib_result_handle; ib_info[0].size = 4; ib_info[0].flags = AMDGPU_IB_FLAG_CE; - ptr = (uint32_t *)ib_result.cpu + 4; + ptr = (uint32_t *)ib_result_cpu + 4; ptr[0] = 0xc0008600; ptr[1] = 0x00000001; - ib_info[1].bo_handle = ib_result.handle; + ib_info[1].bo_handle = ib_result_handle; ib_info[1].size = 2; ib_info[1].offset_dw = 4; @@ -272,7 +281,7 @@ static void amdgpu_command_submission_gfx_shared_ib(void) r = amdgpu_cs_query_fence_status(&fence_status, &expired); CU_ASSERT_EQUAL(r, 0); - r = amdgpu_cs_free_ib(ib_result.handle); + r = amdgpu_bo_free(ib_result_handle); CU_ASSERT_EQUAL(r, 0); r = amdgpu_cs_ctx_free(context_handle); @@ -290,7 +299,9 @@ static void amdgpu_command_submission_gfx(void) static void amdgpu_command_submission_compute(void) { amdgpu_context_handle context_handle; - struct amdgpu_cs_ib_alloc_result ib_result; + amdgpu_bo_handle ib_result_handle; + void *ib_result_cpu; + uint64_t ib_result_mc_address; struct amdgpu_cs_request ibs_request; struct amdgpu_cs_ib_info ib_info; struct amdgpu_cs_query_fence fence_status; @@ -302,17 +313,18 @@ static void amdgpu_command_submission_compute(void) CU_ASSERT_EQUAL(r, 0); for (instance = 0; instance < 8; instance++) { - memset(&ib_result, 0, sizeof(struct amdgpu_cs_ib_alloc_result)); - r = amdgpu_cs_alloc_ib(context_handle, - amdgpu_cs_ib_size_4K, &ib_result); + r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_result_handle, &ib_result_cpu, + &ib_result_mc_address); CU_ASSERT_EQUAL(r, 0); - ptr = ib_result.cpu; + ptr = ib_result_cpu; for (i = 0; i < 16; ++i) ptr[i] = 0xffff1000; memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); - ib_info.bo_handle = ib_result.handle; + ib_info.bo_handle = ib_result_handle; ib_info.size = 16; memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); @@ -334,7 +346,7 @@ static void amdgpu_command_submission_compute(void) r = amdgpu_cs_query_fence_status(&fence_status, &expired); CU_ASSERT_EQUAL(r, 0); - r = amdgpu_cs_free_ib(ib_result.handle); + r = amdgpu_bo_free(ib_result_handle); CU_ASSERT_EQUAL(r, 0); } @@ -356,7 +368,9 @@ static void amdgpu_sdma_test_exec_cs(amdgpu_context_handle context_handle, int r, i, j; uint32_t expired; uint32_t *ring_ptr; - struct amdgpu_cs_ib_alloc_result ib_result = {0}; + amdgpu_bo_handle ib_result_handle; + void *ib_result_cpu; + uint64_t ib_result_mc_address; struct amdgpu_cs_query_fence fence_status = {0}; /* prepare CS */ @@ -367,15 +381,17 @@ static void amdgpu_sdma_test_exec_cs(amdgpu_context_handle context_handle, CU_ASSERT_TRUE(pm4_dw <= 1024); /* allocate IB */ - r = amdgpu_cs_alloc_ib(context_handle, - amdgpu_cs_ib_size_4K, &ib_result); + r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_result_handle, &ib_result_cpu, + &ib_result_mc_address); CU_ASSERT_EQUAL(r, 0); /* copy PM4 packet to ring from caller */ - ring_ptr = ib_result.cpu; + ring_ptr = ib_result_cpu; memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); - ib_info->bo_handle = ib_result.handle; + ib_info->bo_handle = ib_result_handle; ib_info->size = pm4_dw; ibs_request->ip_type = AMDGPU_HW_IP_DMA; @@ -407,7 +423,7 @@ static void amdgpu_sdma_test_exec_cs(amdgpu_context_handle context_handle, CU_ASSERT_EQUAL(r, 0); CU_ASSERT_EQUAL(expired, true); - r = amdgpu_cs_free_ib(ib_result.handle); + r = amdgpu_bo_free(ib_result_handle); CU_ASSERT_EQUAL(r, 0); } diff --git a/tests/amdgpu/cs_tests.c b/tests/amdgpu/cs_tests.c index 81d5e58e..6e76dcc1 100644 --- a/tests/amdgpu/cs_tests.c +++ b/tests/amdgpu/cs_tests.c @@ -31,7 +31,7 @@ #include "amdgpu_drm.h" #include "amdgpu_internal.h" -#define IB_SIZE amdgpu_cs_ib_size_4K +#define IB_SIZE 4096 #define MAX_RESOURCES 16 static amdgpu_device_handle device_handle; @@ -59,7 +59,9 @@ CU_TestInfo cs_tests[] = { int suite_cs_tests_init(void) { - struct amdgpu_cs_ib_alloc_result ib_result = {0}; + amdgpu_bo_handle ib_result_handle; + void *ib_result_cpu; + uint64_t ib_result_mc_address; int r; r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, @@ -73,12 +75,15 @@ int suite_cs_tests_init(void) if (r) return CUE_SINIT_FAILED; - r = amdgpu_cs_alloc_ib(context_handle, IB_SIZE, &ib_result); + r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_result_handle, &ib_result_cpu, + &ib_result_mc_address); if (r) return CUE_SINIT_FAILED; - ib_handle = ib_result.handle; - ib_cpu = ib_result.cpu; + ib_handle = ib_result_handle; + ib_cpu = ib_result_cpu; return CUE_SUCCESS; } @@ -87,7 +92,7 @@ int suite_cs_tests_clean(void) { int r; - r = amdgpu_cs_free_ib(ib_handle); + r = amdgpu_bo_free(ib_handle); if (r) return CUE_SCLEAN_FAILED; @@ -104,7 +109,6 @@ int suite_cs_tests_clean(void) static int submit(unsigned ndw, unsigned ip) { - struct amdgpu_cs_ib_alloc_result ib_result = {0}; struct amdgpu_cs_request ibs_request = {0}; struct amdgpu_cs_ib_info ib_info = {0}; struct amdgpu_cs_query_fence fence_status = {0}; diff --git a/tests/amdgpu/vce_tests.c b/tests/amdgpu/vce_tests.c index 99aebc97..45cfae2a 100644 --- a/tests/amdgpu/vce_tests.c +++ b/tests/amdgpu/vce_tests.c @@ -35,7 +35,7 @@ #include "vce_ib.h" #include "frame.h" -#define IB_SIZE amdgpu_cs_ib_size_4K +#define IB_SIZE 4096 #define MAX_RESOURCES 16 struct amdgpu_vce_bo { @@ -64,9 +64,9 @@ static amdgpu_context_handle context_handle; static amdgpu_bo_handle ib_handle; uint32_t *ib_cpu; -struct amdgpu_vce_encode enc; -amdgpu_bo_handle resources[MAX_RESOURCES]; -unsigned num_resources; +static struct amdgpu_vce_encode enc; +static amdgpu_bo_handle resources[MAX_RESOURCES]; +static unsigned num_resources; static void amdgpu_cs_vce_create(void); static void amdgpu_cs_vce_encode(void); @@ -81,7 +81,6 @@ CU_TestInfo vce_tests[] = { int suite_vce_tests_init(void) { - struct amdgpu_cs_ib_alloc_result ib_result = {0}; int r; r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, @@ -95,13 +94,13 @@ int suite_vce_tests_init(void) if (r) return CUE_SINIT_FAILED; - r = amdgpu_cs_alloc_ib(context_handle, IB_SIZE, &ib_result); + r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_handle, (void**)&ib_cpu, + &ib_mc_address); if (r) return CUE_SINIT_FAILED; - ib_handle = ib_result.handle; - ib_cpu = ib_result.cpu; - memset(&enc, 0, sizeof(struct amdgpu_vce_encode)); return CUE_SUCCESS; @@ -111,7 +110,7 @@ int suite_vce_tests_clean(void) { int r; - r = amdgpu_cs_free_ib(ib_handle); + r = amdgpu_bo_free(ib_handle); if (r) return CUE_SCLEAN_FAILED; @@ -128,7 +127,6 @@ int suite_vce_tests_clean(void) static int submit(unsigned ndw, unsigned ip) { - struct amdgpu_cs_ib_alloc_result ib_result = {0}; struct amdgpu_cs_request ibs_request = {0}; struct amdgpu_cs_ib_info ib_info = {0}; struct amdgpu_cs_query_fence fence_status = {0}; @@ -157,13 +155,13 @@ static int submit(unsigned ndw, unsigned ip) if (r) return r; - r = amdgpu_cs_alloc_ib(context_handle, IB_SIZE, &ib_result); + r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_handle, (void**)&ib_cpu, + &ib_mc_address); if (r) return r; - ib_handle = ib_result.handle; - ib_cpu = ib_result.cpu; - fence_status.context = context_handle; fence_status.timeout_ns = AMDGPU_TIMEOUT_INFINITE; fence_status.ip_type = ip; @@ -374,7 +372,6 @@ static void amdgpu_cs_vce_encode(void) vbuf_size = enc.width * enc.height * 1.5; cpb_size = vbuf_size * 10; - num_resources = 0; alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT); resources[num_resources++] = enc.fb[0].handle;