amdgpu: add CS dependencies v2
This allows the driver to specify on which previous CS to wait. v2: fix spelling in comment Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1) Reviewed-by: Marek Olšák <marek.olsak@amd.com>main
parent
cf5646001e
commit
0f37bc9029
|
@ -251,6 +251,30 @@ struct amdgpu_gds_resource_info {
|
||||||
uint32_t oa_per_compute_partition;
|
uint32_t oa_per_compute_partition;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structure describing CS dependency
|
||||||
|
*
|
||||||
|
* \sa amdgpu_cs_request, amdgpu_cs_submit()
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
struct amdgpu_cs_dep_info {
|
||||||
|
/** Context to which the fence belongs */
|
||||||
|
amdgpu_context_handle context;
|
||||||
|
|
||||||
|
/** To which HW IP type the fence belongs */
|
||||||
|
uint32_t ip_type;
|
||||||
|
|
||||||
|
/** IP instance index if there are several IPs of the same type. */
|
||||||
|
uint32_t ip_instance;
|
||||||
|
|
||||||
|
/** Ring index of the HW IP */
|
||||||
|
uint32_t ring;
|
||||||
|
|
||||||
|
/** Specify fence for which we need to check
|
||||||
|
* submission status.*/
|
||||||
|
uint64_t fence;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Structure describing IB
|
* Structure describing IB
|
||||||
*
|
*
|
||||||
|
@ -301,6 +325,18 @@ struct amdgpu_cs_request {
|
||||||
*/
|
*/
|
||||||
amdgpu_bo_list_handle resources;
|
amdgpu_bo_list_handle resources;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of dependencies this Command submission needs to
|
||||||
|
* wait for before starting execution.
|
||||||
|
*/
|
||||||
|
uint32_t number_of_dependencies;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Array of dependencies which need to be met before
|
||||||
|
* execution can start.
|
||||||
|
*/
|
||||||
|
struct amdgpu_cs_dep_info *dependencies;
|
||||||
|
|
||||||
/** Number of IBs to submit in the field ibs. */
|
/** Number of IBs to submit in the field ibs. */
|
||||||
uint32_t number_of_ibs;
|
uint32_t number_of_ibs;
|
||||||
|
|
||||||
|
|
|
@ -186,6 +186,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
|
||||||
uint64_t *chunk_array;
|
uint64_t *chunk_array;
|
||||||
struct drm_amdgpu_cs_chunk *chunks;
|
struct drm_amdgpu_cs_chunk *chunks;
|
||||||
struct drm_amdgpu_cs_chunk_data *chunk_data;
|
struct drm_amdgpu_cs_chunk_data *chunk_data;
|
||||||
|
struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
|
||||||
uint32_t i, size;
|
uint32_t i, size;
|
||||||
int r = 0;
|
int r = 0;
|
||||||
|
|
||||||
|
@ -196,10 +197,12 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
|
||||||
if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
|
if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
size = ibs_request->number_of_ibs + 1;
|
size = ibs_request->number_of_ibs + 2;
|
||||||
|
|
||||||
chunk_array = alloca(sizeof(uint64_t) * size);
|
chunk_array = alloca(sizeof(uint64_t) * size);
|
||||||
chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
|
chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
|
||||||
|
|
||||||
|
size = ibs_request->number_of_ibs + 1;
|
||||||
chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
|
chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
|
||||||
|
|
||||||
memset(&cs, 0, sizeof(cs));
|
memset(&cs, 0, sizeof(cs));
|
||||||
|
@ -247,6 +250,34 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
|
||||||
chunk_data[i].fence_data.offset *= sizeof(uint64_t);
|
chunk_data[i].fence_data.offset *= sizeof(uint64_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ibs_request->number_of_dependencies) {
|
||||||
|
dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) *
|
||||||
|
ibs_request->number_of_dependencies);
|
||||||
|
if (!dependencies) {
|
||||||
|
r = -ENOMEM;
|
||||||
|
goto error_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
|
||||||
|
struct amdgpu_cs_dep_info *info = &ibs_request->dependencies[i];
|
||||||
|
struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
|
||||||
|
dep->ip_type = info->ip_type;
|
||||||
|
dep->ip_instance = info->ip_instance;
|
||||||
|
dep->ring = info->ring;
|
||||||
|
dep->ctx_id = info->context->id;
|
||||||
|
dep->handle = info->fence;
|
||||||
|
}
|
||||||
|
|
||||||
|
i = cs.in.num_chunks++;
|
||||||
|
|
||||||
|
/* dependencies chunk */
|
||||||
|
chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
|
||||||
|
chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
|
||||||
|
chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
|
||||||
|
* ibs_request->number_of_dependencies;
|
||||||
|
chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
|
||||||
|
}
|
||||||
|
|
||||||
r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
|
r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
|
||||||
&cs, sizeof(cs));
|
&cs, sizeof(cs));
|
||||||
if (r)
|
if (r)
|
||||||
|
@ -256,6 +287,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
|
||||||
|
|
||||||
error_unlock:
|
error_unlock:
|
||||||
pthread_mutex_unlock(&context->sequence_mutex);
|
pthread_mutex_unlock(&context->sequence_mutex);
|
||||||
|
free(dependencies);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue