amdgpu: Add vamgr for capture/replay.

In Vulkan we have extensions to assist with capture in replay in a world where addresses are returned to the application. This involves creating buffers at the same VA during replay as they were during capture. By itself libdrm_amdgpu already has support for this, but there is the obvious failure mode that if another buffer is already allocated at that VA things fail spectacularly. This is an actual issue as internal buffers, like winsys images or shader binaries also participate in the same VA allocation. To avoid this problem applications have to create buffers which are going to be captured with a flag, and the implementation is to separate VA allocation for those buffers to reduce the collision risk: "Implementations are expected to separate such buffers in the GPU address space so normal allocations will avoid using these addresses. Apps/tools should avoid mixing app-provided and implementation-provided addresses for buffers created with VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT, to avoid address space allocation conflicts." This patch implements that by adding a flag for these buffers and allocating address space from the top of the address range instead of the bottom. Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Christian König <christian.koenig@amd.com>
2020-11-23 02:18:05 +01:00 · 2020-11-23 02:18:05 +01:00 · 085ee3e488
parent d615430c68
commit 085ee3e488
2 changed files with 85 additions and 49 deletions
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@ -1280,6 +1280,7 @@ int amdgpu_read_mm_registers(amdgpu_device_handle dev, unsigned dword_offset,
 */
 #define AMDGPU_VA_RANGE_32_BIT		0x1
 #define AMDGPU_VA_RANGE_HIGH		0x2
 #define AMDGPU_VA_RANGE_REPLAYABLE	0x4
 /**
 * Allocate virtual address range
--- a/amdgpu/amdgpu_vamgr.c
+++ b/amdgpu/amdgpu_vamgr.c
@ -69,65 +69,99 @@ drm_private void amdgpu_vamgr_deinit(struct amdgpu_bo_va_mgr *mgr)
 	pthread_mutex_destroy(&mgr->bo_va_mutex);
 }
-static drm_private uint64_t
+static drm_private int
 amdgpu_vamgr_subtract_hole(struct amdgpu_bo_va_hole *hole, uint64_t start_va,
 			   uint64_t end_va)
 {
 	if (start_va > hole->offset && end_va - hole->offset < hole->size) {
 		struct amdgpu_bo_va_hole *n = calloc(1, sizeof(struct amdgpu_bo_va_hole));
 		if (!n)
 			return -ENOMEM;
 		n->size = start_va - hole->offset;
 		n->offset = hole->offset;
 		list_add(&n->list, &hole->list);
 		hole->size -= (end_va - hole->offset);
 		hole->offset = end_va;
 	} else if (start_va > hole->offset) {
 		hole->size = start_va - hole->offset;
 	} else if (end_va - hole->offset < hole->size) {
 		hole->size -= (end_va - hole->offset);
 		hole->offset = end_va;
 	} else {
 		list_del(&hole->list);
 		free(hole);
 	}
 	return 0;
 }
 static drm_private int
 amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr, uint64_t size,
-		     uint64_t alignment, uint64_t base_required)
+		     uint64_t alignment, uint64_t base_required,
 		     bool search_from_top, uint64_t *va_out)
 {
 	struct amdgpu_bo_va_hole *hole, *n;
-	uint64_t offset = 0, waste = 0;
+	uint64_t offset = 0;
 	int ret;
 	alignment = MAX2(alignment, mgr->va_alignment);
 	size = ALIGN(size, mgr->va_alignment);
 	if (base_required % alignment)
-		return AMDGPU_INVALID_VA_ADDRESS;
+		return -EINVAL;
 	pthread_mutex_lock(&mgr->bo_va_mutex);
-	LIST_FOR_EACH_ENTRY_SAFE_REV(hole, n, &mgr->va_holes, list) {
+	if (!search_from_top) {
-		if (base_required) {
+		LIST_FOR_EACH_ENTRY_SAFE_REV(hole, n, &mgr->va_holes, list) {
-			if (hole->offset > base_required ||
+			if (base_required) {
-			    (hole->offset + hole->size) < (base_required + size))
+				if (hole->offset > base_required ||
-				continue;
+				   (hole->offset + hole->size) < (base_required + size))
-			waste = base_required - hole->offset;
+					continue;
-			offset = base_required;
+				offset = base_required;
-		} else {
+			} else {
-			offset = hole->offset;
+				uint64_t waste = hole->offset % alignment;
-			waste = offset % alignment;
+				waste = waste ? alignment - waste : 0;
-			waste = waste ? alignment - waste : 0;
+				offset = hole->offset + waste;
-			offset += waste;
+				if (offset >= (hole->offset + hole->size) ||
-			if (offset >= (hole->offset + hole->size)) {
+				    size > (hole->offset + hole->size) - offset) {
-				continue;
+					continue;
 				}
 			}
-		}
+			ret = amdgpu_vamgr_subtract_hole(hole, offset, offset + size);
 		if (!waste && hole->size == size) {
 			offset = hole->offset;
 			list_del(&hole->list);
 			free(hole);
 			pthread_mutex_unlock(&mgr->bo_va_mutex);
-			return offset;
+			*va_out = offset;
 			return ret;
 		}
-		if ((hole->size - waste) > size) {
+	} else {
-			if (waste) {
+		LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
-				n = calloc(1, sizeof(struct amdgpu_bo_va_hole));
+			if (base_required) {
-				n->size = waste;
+				if (hole->offset > base_required ||
-				n->offset = hole->offset;
+				   (hole->offset + hole->size) < (base_required + size))
-				list_add(&n->list, &hole->list);
+					continue;
 				offset = base_required;
 			} else {
 				if (size > hole->size)
 					continue;
 				offset = hole->offset + hole->size - size;
 				offset -= offset % alignment;
 				if (offset < hole->offset) {
 					continue;
 				}
 			}
-			hole->size -= (size + waste);
+
-			hole->offset += size + waste;
+			ret = amdgpu_vamgr_subtract_hole(hole, offset, offset + size);
 			pthread_mutex_unlock(&mgr->bo_va_mutex);
-			return offset;
+			*va_out = offset;
-		}
+			return ret;
 		if ((hole->size - waste) == size) {
 			hole->size = waste;
 			pthread_mutex_unlock(&mgr->bo_va_mutex);
 			return offset;
 		}
 	}
 	pthread_mutex_unlock(&mgr->bo_va_mutex);
-	return AMDGPU_INVALID_VA_ADDRESS;
+	return -ENOMEM;
 }
 static drm_private void
@ -196,6 +230,8 @@ drm_public int amdgpu_va_range_alloc(amdgpu_device_handle dev,
 				     uint64_t flags)
 {
 	struct amdgpu_bo_va_mgr *vamgr;
 	bool search_from_top = !!(flags & AMDGPU_VA_RANGE_REPLAYABLE);
 	int ret;
 	/* Clear the flag when the high VA manager is not initialized */
 	if (flags & AMDGPU_VA_RANGE_HIGH && !dev->vamgr_high_32.va_max)
@ -216,21 +252,22 @@ drm_public int amdgpu_va_range_alloc(amdgpu_device_handle dev,
 	va_base_alignment = MAX2(va_base_alignment, vamgr->va_alignment);
 	size = ALIGN(size, vamgr->va_alignment);
-	*va_base_allocated = amdgpu_vamgr_find_va(vamgr, size,
+	ret = amdgpu_vamgr_find_va(vamgr, size,
-					va_base_alignment, va_base_required);
+				   va_base_alignment, va_base_required,
 				   search_from_top, va_base_allocated);
-	if (!(flags & AMDGPU_VA_RANGE_32_BIT) &&
+	if (!(flags & AMDGPU_VA_RANGE_32_BIT) && ret) {
 	    (*va_base_allocated == AMDGPU_INVALID_VA_ADDRESS)) {
 		/* fallback to 32bit address */
 		if (flags & AMDGPU_VA_RANGE_HIGH)
 			vamgr = &dev->vamgr_high_32;
 		else
 			vamgr = &dev->vamgr_32;
-		*va_base_allocated = amdgpu_vamgr_find_va(vamgr, size,
+		ret = amdgpu_vamgr_find_va(vamgr, size,
-					va_base_alignment, va_base_required);
+					   va_base_alignment, va_base_required,
 					   search_from_top, va_base_allocated);
 	}
-	if (*va_base_allocated != AMDGPU_INVALID_VA_ADDRESS) {
+	if (!ret) {
 		struct amdgpu_va* va;
 		va = calloc(1, sizeof(struct amdgpu_va));
 		if(!va){
@ -243,11 +280,9 @@ drm_public int amdgpu_va_range_alloc(amdgpu_device_handle dev,
 		va->range = va_range_type;
 		va->vamgr = vamgr;
 		*va_range_handle = va;
 	} else {
 		return -EINVAL;
 	}
-	return 0;
+	return ret;
 }
 drm_public int amdgpu_va_range_free(amdgpu_va_handle va_range_handle)