drm/amdgpu: add new low overhead command submission API. (v2)

This just sends chunks to the kernel API for a single command
stream.

This should provide a more future proof and extensible API
for command submission.

v2: use amdgpu_bo_list_handle, add two helper functions to
access bo and context internals.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
main
Dave Airlie 2017-07-18 01:31:27 +01:00
parent 69532d0188
commit 22790a65d4
2 changed files with 77 additions and 0 deletions

View File

@ -1382,6 +1382,36 @@ int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
int shared_fd, int shared_fd,
uint32_t *syncobj); uint32_t *syncobj);
/**
* Submit raw command submission to kernel
*
* \param dev - \c [in] device handle
* \param context - \c [in] context handle for context id
* \param bo_list_handle - \c [in] request bo list handle (0 for none)
* \param num_chunks - \c [in] number of CS chunks to submit
* \param chunks - \c [in] array of CS chunks
* \param seq_no - \c [out] output sequence number for submission.
*
* \return 0 on success\n
* <0 - Negative POSIX Error code
*
*/
struct drm_amdgpu_cs_chunk;
struct drm_amdgpu_cs_chunk_dep;
struct drm_amdgpu_cs_chunk_data;
int amdgpu_cs_submit_raw(amdgpu_device_handle dev,
amdgpu_context_handle context,
amdgpu_bo_list_handle bo_list_handle,
int num_chunks,
struct drm_amdgpu_cs_chunk *chunks,
uint64_t *seq_no);
void amdgpu_cs_chunk_fence_to_dep(struct amdgpu_cs_fence *fence,
struct drm_amdgpu_cs_chunk_dep *dep);
void amdgpu_cs_chunk_fence_info_to_data(struct amdgpu_cs_fence_info *fence_info,
struct drm_amdgpu_cs_chunk_data *data);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -634,3 +634,50 @@ int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
return drmSyncobjFDToHandle(dev->fd, shared_fd, handle); return drmSyncobjFDToHandle(dev->fd, shared_fd, handle);
} }
int amdgpu_cs_submit_raw(amdgpu_device_handle dev,
amdgpu_context_handle context,
amdgpu_bo_list_handle bo_list_handle,
int num_chunks,
struct drm_amdgpu_cs_chunk *chunks,
uint64_t *seq_no)
{
union drm_amdgpu_cs cs = {0};
uint64_t *chunk_array;
int i, r;
if (num_chunks == 0)
return -EINVAL;
chunk_array = alloca(sizeof(uint64_t) * num_chunks);
for (i = 0; i < num_chunks; i++)
chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
cs.in.ctx_id = context->id;
cs.in.bo_list_handle = bo_list_handle ? bo_list_handle->handle : 0;
cs.in.num_chunks = num_chunks;
r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CS,
&cs, sizeof(cs));
if (r)
return r;
if (seq_no)
*seq_no = cs.out.handle;
return 0;
}
void amdgpu_cs_chunk_fence_info_to_data(struct amdgpu_cs_fence_info *fence_info,
struct drm_amdgpu_cs_chunk_data *data)
{
data->fence_data.handle = fence_info->handle->handle;
data->fence_data.offset = fence_info->offset * sizeof(uint64_t);
}
void amdgpu_cs_chunk_fence_to_dep(struct amdgpu_cs_fence *fence,
struct drm_amdgpu_cs_chunk_dep *dep)
{
dep->ip_type = fence->ip_type;
dep->ip_instance = fence->ip_instance;
dep->ring = fence->ring;
dep->ctx_id = fence->context->id;
dep->handle = fence->fence;
}