amdgpu: Add vamgr for capture/replay.

In Vulkan we have extensions to assist with capture in replay in a
world where addresses are returned to the application. This involves
creating buffers at the same VA during replay as they were during
capture.

By itself libdrm_amdgpu already has support for this, but there is
the obvious failure mode that if another buffer is already allocated
at that VA things fail spectacularly. This is an actual issue as
internal buffers, like winsys images or shader binaries also
participate in the same VA allocation.

To avoid this problem applications have to create buffers which
are going to be captured with a flag, and the implementation is to
separate VA allocation for those buffers to reduce the collision risk:

"Implementations are expected to separate such buffers in the GPU address
space so normal allocations will avoid using these addresses. Apps/tools
should avoid mixing app-provided and implementation-provided addresses for
buffers created with VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT,
to avoid address space allocation conflicts."

This patch implements that by adding a flag for these buffers and allocating
address space from the top of the address range instead of the bottom.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Christian König <christian.koenig@amd.com>
main
Bas Nieuwenhuizen 2020-11-23 02:18:05 +01:00
parent d615430c68
commit 085ee3e488
2 changed files with 85 additions and 49 deletions

View File

@ -1280,6 +1280,7 @@ int amdgpu_read_mm_registers(amdgpu_device_handle dev, unsigned dword_offset,
*/ */
#define AMDGPU_VA_RANGE_32_BIT 0x1 #define AMDGPU_VA_RANGE_32_BIT 0x1
#define AMDGPU_VA_RANGE_HIGH 0x2 #define AMDGPU_VA_RANGE_HIGH 0x2
#define AMDGPU_VA_RANGE_REPLAYABLE 0x4
/** /**
* Allocate virtual address range * Allocate virtual address range

View File

@ -69,65 +69,99 @@ drm_private void amdgpu_vamgr_deinit(struct amdgpu_bo_va_mgr *mgr)
pthread_mutex_destroy(&mgr->bo_va_mutex); pthread_mutex_destroy(&mgr->bo_va_mutex);
} }
static drm_private uint64_t static drm_private int
amdgpu_vamgr_subtract_hole(struct amdgpu_bo_va_hole *hole, uint64_t start_va,
uint64_t end_va)
{
if (start_va > hole->offset && end_va - hole->offset < hole->size) {
struct amdgpu_bo_va_hole *n = calloc(1, sizeof(struct amdgpu_bo_va_hole));
if (!n)
return -ENOMEM;
n->size = start_va - hole->offset;
n->offset = hole->offset;
list_add(&n->list, &hole->list);
hole->size -= (end_va - hole->offset);
hole->offset = end_va;
} else if (start_va > hole->offset) {
hole->size = start_va - hole->offset;
} else if (end_va - hole->offset < hole->size) {
hole->size -= (end_va - hole->offset);
hole->offset = end_va;
} else {
list_del(&hole->list);
free(hole);
}
return 0;
}
static drm_private int
amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr, uint64_t size, amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr, uint64_t size,
uint64_t alignment, uint64_t base_required) uint64_t alignment, uint64_t base_required,
bool search_from_top, uint64_t *va_out)
{ {
struct amdgpu_bo_va_hole *hole, *n; struct amdgpu_bo_va_hole *hole, *n;
uint64_t offset = 0, waste = 0; uint64_t offset = 0;
int ret;
alignment = MAX2(alignment, mgr->va_alignment); alignment = MAX2(alignment, mgr->va_alignment);
size = ALIGN(size, mgr->va_alignment); size = ALIGN(size, mgr->va_alignment);
if (base_required % alignment) if (base_required % alignment)
return AMDGPU_INVALID_VA_ADDRESS; return -EINVAL;
pthread_mutex_lock(&mgr->bo_va_mutex); pthread_mutex_lock(&mgr->bo_va_mutex);
LIST_FOR_EACH_ENTRY_SAFE_REV(hole, n, &mgr->va_holes, list) { if (!search_from_top) {
if (base_required) { LIST_FOR_EACH_ENTRY_SAFE_REV(hole, n, &mgr->va_holes, list) {
if (hole->offset > base_required || if (base_required) {
(hole->offset + hole->size) < (base_required + size)) if (hole->offset > base_required ||
continue; (hole->offset + hole->size) < (base_required + size))
waste = base_required - hole->offset; continue;
offset = base_required; offset = base_required;
} else { } else {
offset = hole->offset; uint64_t waste = hole->offset % alignment;
waste = offset % alignment; waste = waste ? alignment - waste : 0;
waste = waste ? alignment - waste : 0; offset = hole->offset + waste;
offset += waste; if (offset >= (hole->offset + hole->size) ||
if (offset >= (hole->offset + hole->size)) { size > (hole->offset + hole->size) - offset) {
continue; continue;
}
} }
} ret = amdgpu_vamgr_subtract_hole(hole, offset, offset + size);
if (!waste && hole->size == size) {
offset = hole->offset;
list_del(&hole->list);
free(hole);
pthread_mutex_unlock(&mgr->bo_va_mutex); pthread_mutex_unlock(&mgr->bo_va_mutex);
return offset; *va_out = offset;
return ret;
} }
if ((hole->size - waste) > size) { } else {
if (waste) { LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
n = calloc(1, sizeof(struct amdgpu_bo_va_hole)); if (base_required) {
n->size = waste; if (hole->offset > base_required ||
n->offset = hole->offset; (hole->offset + hole->size) < (base_required + size))
list_add(&n->list, &hole->list); continue;
offset = base_required;
} else {
if (size > hole->size)
continue;
offset = hole->offset + hole->size - size;
offset -= offset % alignment;
if (offset < hole->offset) {
continue;
}
} }
hole->size -= (size + waste);
hole->offset += size + waste; ret = amdgpu_vamgr_subtract_hole(hole, offset, offset + size);
pthread_mutex_unlock(&mgr->bo_va_mutex); pthread_mutex_unlock(&mgr->bo_va_mutex);
return offset; *va_out = offset;
} return ret;
if ((hole->size - waste) == size) {
hole->size = waste;
pthread_mutex_unlock(&mgr->bo_va_mutex);
return offset;
} }
} }
pthread_mutex_unlock(&mgr->bo_va_mutex); pthread_mutex_unlock(&mgr->bo_va_mutex);
return AMDGPU_INVALID_VA_ADDRESS; return -ENOMEM;
} }
static drm_private void static drm_private void
@ -196,6 +230,8 @@ drm_public int amdgpu_va_range_alloc(amdgpu_device_handle dev,
uint64_t flags) uint64_t flags)
{ {
struct amdgpu_bo_va_mgr *vamgr; struct amdgpu_bo_va_mgr *vamgr;
bool search_from_top = !!(flags & AMDGPU_VA_RANGE_REPLAYABLE);
int ret;
/* Clear the flag when the high VA manager is not initialized */ /* Clear the flag when the high VA manager is not initialized */
if (flags & AMDGPU_VA_RANGE_HIGH && !dev->vamgr_high_32.va_max) if (flags & AMDGPU_VA_RANGE_HIGH && !dev->vamgr_high_32.va_max)
@ -216,21 +252,22 @@ drm_public int amdgpu_va_range_alloc(amdgpu_device_handle dev,
va_base_alignment = MAX2(va_base_alignment, vamgr->va_alignment); va_base_alignment = MAX2(va_base_alignment, vamgr->va_alignment);
size = ALIGN(size, vamgr->va_alignment); size = ALIGN(size, vamgr->va_alignment);
*va_base_allocated = amdgpu_vamgr_find_va(vamgr, size, ret = amdgpu_vamgr_find_va(vamgr, size,
va_base_alignment, va_base_required); va_base_alignment, va_base_required,
search_from_top, va_base_allocated);
if (!(flags & AMDGPU_VA_RANGE_32_BIT) && if (!(flags & AMDGPU_VA_RANGE_32_BIT) && ret) {
(*va_base_allocated == AMDGPU_INVALID_VA_ADDRESS)) {
/* fallback to 32bit address */ /* fallback to 32bit address */
if (flags & AMDGPU_VA_RANGE_HIGH) if (flags & AMDGPU_VA_RANGE_HIGH)
vamgr = &dev->vamgr_high_32; vamgr = &dev->vamgr_high_32;
else else
vamgr = &dev->vamgr_32; vamgr = &dev->vamgr_32;
*va_base_allocated = amdgpu_vamgr_find_va(vamgr, size, ret = amdgpu_vamgr_find_va(vamgr, size,
va_base_alignment, va_base_required); va_base_alignment, va_base_required,
search_from_top, va_base_allocated);
} }
if (*va_base_allocated != AMDGPU_INVALID_VA_ADDRESS) { if (!ret) {
struct amdgpu_va* va; struct amdgpu_va* va;
va = calloc(1, sizeof(struct amdgpu_va)); va = calloc(1, sizeof(struct amdgpu_va));
if(!va){ if(!va){
@ -243,11 +280,9 @@ drm_public int amdgpu_va_range_alloc(amdgpu_device_handle dev,
va->range = va_range_type; va->range = va_range_type;
va->vamgr = vamgr; va->vamgr = vamgr;
*va_range_handle = va; *va_range_handle = va;
} else {
return -EINVAL;
} }
return 0; return ret;
} }
drm_public int amdgpu_va_range_free(amdgpu_va_handle va_range_handle) drm_public int amdgpu_va_range_free(amdgpu_va_handle va_range_handle)