diff --git a/tests/amdgpu/amdgpu_stress.c b/tests/amdgpu/amdgpu_stress.c new file mode 100644 index 00000000..5c5c88c5 --- /dev/null +++ b/tests/amdgpu/amdgpu_stress.c @@ -0,0 +1,418 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "drm.h" +#include "xf86drmMode.h" +#include "xf86drm.h" +#include "amdgpu.h" +#include "amdgpu_drm.h" +#include "amdgpu_internal.h" + +#define MAX_CARDS_SUPPORTED 4 +#define NUM_BUFFER_OBJECTS 1024 + +#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ + (((sub_op) & 0xFF) << 8) | \ + (((op) & 0xFF) << 0)) + +#define SDMA_OPCODE_COPY 1 +# define SDMA_COPY_SUB_OPCODE_LINEAR 0 + + +#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ + (((b) & 0x1) << 26) | \ + (((t) & 0x1) << 23) | \ + (((s) & 0x1) << 22) | \ + (((cnt) & 0xFFFFF) << 0)) +#define SDMA_OPCODE_COPY_SI 3 + + +/** Help string for command line parameters */ +static const char usage[] = + "Usage: %s [-?h] [-b v|g|vg size] " + "[-c from to size count]\n" + "where:\n" + " b - Allocate a BO in VRAM, GTT or VRAM|GTT of size bytes.\n" + " This flag can be used multiple times. The first bo will\n" + " have id `1`, then second id `2`, ...\n" + " c - Copy size bytes from BO (bo_id1) to BO (bo_id2), count times\n" + " h - Display this help\n" + "\n" + "Sizes can be postfixes with k, m or g for kilo, mega and gigabyte scaling\n"; + +/** Specified options strings for getopt */ +static const char options[] = "?hb:c:"; + +/* Open AMD devices. + * Returns the fd of the first device it could open. + */ +static int amdgpu_open_device(void) +{ + drmDevicePtr devices[MAX_CARDS_SUPPORTED]; + unsigned int i; + int drm_count; + + drm_count = drmGetDevices2(0, devices, MAX_CARDS_SUPPORTED); + if (drm_count < 0) { + fprintf(stderr, "drmGetDevices2() returned an error %d\n", + drm_count); + return drm_count; + } + + for (i = 0; i < drm_count; i++) { + drmVersionPtr version; + int fd; + + /* If this is not PCI device, skip*/ + if (devices[i]->bustype != DRM_BUS_PCI) + continue; + + /* If this is not AMD GPU vender ID, skip*/ + if (devices[i]->deviceinfo.pci->vendor_id != 0x1002) + continue; + + if (!(devices[i]->available_nodes & 1 << DRM_NODE_RENDER)) + continue; + + fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC); + + /* This node is not available. */ + if (fd < 0) continue; + + version = drmGetVersion(fd); + if (!version) { + fprintf(stderr, + "Warning: Cannot get version for %s." + "Error is %s\n", + devices[i]->nodes[DRM_NODE_RENDER], + strerror(errno)); + close(fd); + continue; + } + + if (strcmp(version->name, "amdgpu")) { + /* This is not AMDGPU driver, skip.*/ + drmFreeVersion(version); + close(fd); + continue; + } + + drmFreeVersion(version); + drmFreeDevices(devices, drm_count); + return fd; + } + + return -1; +} + +amdgpu_device_handle device_handle; +amdgpu_context_handle context_handle; + +amdgpu_bo_handle resources[NUM_BUFFER_OBJECTS]; +uint64_t virtual[NUM_BUFFER_OBJECTS]; +unsigned int num_buffers; +uint32_t *pm4; + +int alloc_bo(uint32_t domain, uint64_t size) +{ + struct amdgpu_bo_alloc_request request = {}; + amdgpu_bo_handle bo; + amdgpu_va_handle va; + uint64_t addr; + int r; + + if (num_buffers >= NUM_BUFFER_OBJECTS) + return -ENOSPC; + + request.alloc_size = size; + request.phys_alignment = 0; + request.preferred_heap = domain; + request.flags = 0; + r = amdgpu_bo_alloc(device_handle, &request, &bo); + if (r) + return r; + + r = amdgpu_va_range_alloc(device_handle, amdgpu_gpu_va_range_general, + size, 0, 0, &addr, &va, 0); + if (r) + return r; + + r = amdgpu_bo_va_op_raw(device_handle, bo, 0, size, addr, + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE, AMDGPU_VA_OP_MAP); + if (r) + return r; + + resources[num_buffers] = bo; + virtual[num_buffers] = addr; + fprintf(stdout, "Allocated BO number %u at 0x%lx, domain 0x%x, size %lu\n", + num_buffers++, addr, domain, size); + return 0; +} + +int submit_ib(uint32_t from, uint32_t to, uint64_t size, uint32_t count) +{ + struct amdgpu_cs_request ibs_request; + struct amdgpu_cs_fence fence_status; + struct amdgpu_cs_ib_info ib_info; + uint64_t copied = size, delta; + struct timespec start, stop; + + uint64_t src = virtual[from]; + uint64_t dst = virtual[to]; + uint32_t expired; + int i, r; + + i = 0; + while (size) { + uint64_t bytes = size < 0x40000 ? size : 0x40000; + + if (device_handle->info.family_id == AMDGPU_FAMILY_SI) { + pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, + bytes); + pm4[i++] = 0xffffffff & dst; + pm4[i++] = 0xffffffff & src; + pm4[i++] = (0xffffffff00000000 & dst) >> 32; + pm4[i++] = (0xffffffff00000000 & src) >> 32; + } else { + pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, + SDMA_COPY_SUB_OPCODE_LINEAR, + 0); + if ( device_handle->info.family_id >= AMDGPU_FAMILY_AI) + pm4[i++] = bytes - 1; + else + pm4[i++] = bytes; + pm4[i++] = 0; + pm4[i++] = 0xffffffff & src; + pm4[i++] = (0xffffffff00000000 & src) >> 32; + pm4[i++] = 0xffffffff & dst; + pm4[i++] = (0xffffffff00000000 & dst) >> 32; + } + + size -= bytes; + src += bytes; + dst += bytes; + } + + memset(&ib_info, 0, sizeof(ib_info)); + ib_info.ib_mc_address = virtual[0]; + ib_info.size = i; + + memset(&ibs_request, 0, sizeof(ibs_request)); + ibs_request.ip_type = AMDGPU_HW_IP_DMA; + ibs_request.ring = 0; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = &ib_info; + ibs_request.fence_info.handle = NULL; + + r = clock_gettime(CLOCK_MONOTONIC, &start); + if (r) + return errno; + + r = amdgpu_bo_list_create(device_handle, num_buffers, resources, NULL, + &ibs_request.resources); + if (r) + return r; + + for (i = 0; i < count; ++i) { + r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); + if (r) + return r; + } + + r = amdgpu_bo_list_destroy(ibs_request.resources); + if (r) + return r; + + memset(&fence_status, 0, sizeof(fence_status)); + fence_status.ip_type = ibs_request.ip_type; + fence_status.ip_instance = 0; + fence_status.ring = ibs_request.ring; + fence_status.context = context_handle; + fence_status.fence = ibs_request.seq_no; + r = amdgpu_cs_query_fence_status(&fence_status, + AMDGPU_TIMEOUT_INFINITE, + 0, &expired); + if (r) + return r; + + r = clock_gettime(CLOCK_MONOTONIC, &stop); + if (r) + return errno; + + delta = stop.tv_nsec + stop.tv_sec * 1000000000UL; + delta -= start.tv_nsec + start.tv_sec * 1000000000UL; + + fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes took %lu usec\n", + count, from, virtual[from], to, virtual[to], copied, delta / 1000); + return 0; +} + +void next_arg(int argc, char **argv, const char *msg) +{ + optarg = argv[optind++]; + if (optind > argc || optarg[0] == '-') { + fprintf(stderr, "%s\n", msg); + exit(EXIT_FAILURE); + } +} + +uint64_t parse_size(void) +{ + uint64_t size; + char ext[2]; + + ext[0] = 0; + if (sscanf(optarg, "%li%1[kmgKMG]", &size, ext) < 1) { + fprintf(stderr, "Can't parse size arg: %s\n", optarg); + exit(EXIT_FAILURE); + } + switch (ext[0]) { + case 'k': + case 'K': + size *= 1024; + break; + case 'm': + case 'M': + size *= 1024 * 1024; + break; + case 'g': + case 'G': + size *= 1024 * 1024 * 1024; + break; + default: + break; + } + return size; +} + +int main(int argc, char **argv) +{ + uint32_t major_version, minor_version; + uint32_t domain, from, to, count; + uint64_t size; + int fd, r, c; + + fd = amdgpu_open_device(); + if (fd < 0) { + perror("Cannot open AMDGPU device"); + exit(EXIT_FAILURE); + } + + r = amdgpu_device_initialize(fd, &major_version, &minor_version, &device_handle); + if (r) { + fprintf(stderr, "amdgpu_device_initialize returned %d\n", r); + exit(EXIT_FAILURE); + } + + r = amdgpu_cs_ctx_create(device_handle, &context_handle); + if (r) { + fprintf(stderr, "amdgpu_cs_ctx_create returned %d\n", r); + exit(EXIT_FAILURE); + } + + if (argc == 1) { + fprintf(stderr, usage, argv[0]); + exit(EXIT_FAILURE); + } + + r = alloc_bo(AMDGPU_GEM_DOMAIN_GTT, 2ULL * 1024 * 1024); + if (r) { + fprintf(stderr, "Buffer allocation failed with %d\n", r); + exit(EXIT_FAILURE); + } + + r = amdgpu_bo_cpu_map(resources[0], (void **)&pm4); + if (r) { + fprintf(stderr, "Buffer mapping failed with %d\n", r); + exit(EXIT_FAILURE); + } + + opterr = 0; + while ((c = getopt(argc, argv, options)) != -1) { + switch (c) { + case 'b': + if (!strcmp(optarg, "v")) + domain = AMDGPU_GEM_DOMAIN_VRAM; + else if (!strcmp(optarg, "g")) + domain = AMDGPU_GEM_DOMAIN_GTT; + else if (!strcmp(optarg, "vg")) + domain = AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT; + else { + fprintf(stderr, "Invalid domain: %s\n", optarg); + exit(EXIT_FAILURE); + } + next_arg(argc, argv, "Missing buffer size"); + size = parse_size(); + if (size < getpagesize()) { + fprintf(stderr, "Buffer size to small %lu\n", size); + exit(EXIT_FAILURE); + } + r = alloc_bo(domain, size); + if (r) { + fprintf(stderr, "Buffer allocation failed with %d\n", r); + exit(EXIT_FAILURE); + } + break; + case 'c': + if (sscanf(optarg, "%u", &from) != 1) { + fprintf(stderr, "Can't parse from buffer: %s\n", optarg); + exit(EXIT_FAILURE); + } + next_arg(argc, argv, "Missing to buffer"); + if (sscanf(optarg, "%u", &to) != 1) { + fprintf(stderr, "Can't parse to buffer: %s\n", optarg); + exit(EXIT_FAILURE); + } + next_arg(argc, argv, "Missing size"); + size = parse_size(); + next_arg(argc, argv, "Missing count"); + count = parse_size(); + r = submit_ib(from, to, size, count); + if (r) { + fprintf(stderr, "IB submission failed with %d\n", r); + exit(EXIT_FAILURE); + } + break; + case '?': + case 'h': + fprintf(stderr, usage, argv[0]); + exit(EXIT_SUCCESS); + default: + fprintf(stderr, usage, argv[0]); + exit(EXIT_FAILURE); + } + } + + return EXIT_SUCCESS; +} diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build index e6e30812..3a3b7601 100644 --- a/tests/amdgpu/meson.build +++ b/tests/amdgpu/meson.build @@ -33,3 +33,14 @@ if dep_cunit.found() install : with_install_tests, ) endif + +amdgpu_stress = executable( + 'amdgpu_stress', + files( + 'amdgpu_stress.c' + ), + dependencies : [dep_threads, dep_atomic_ops], + include_directories : [inc_root, inc_drm, include_directories('../../amdgpu')], + link_with : [libdrm, libdrm_amdgpu], + install : with_install_tests, +)