amdgpu: add semaphore support
the semaphore is a binary semaphore. the work flow is: 1. create sem 2. signal sem 3. wait sem, reset sem after signalled 4. destroy sem. Signed-off-by: Chunming Zhou <david1.zhou@amd.com> Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com>main
parent
f06c992819
commit
6afadeaf13
|
@ -124,6 +124,11 @@ typedef struct amdgpu_bo_list *amdgpu_bo_list_handle;
|
|||
*/
|
||||
typedef struct amdgpu_va *amdgpu_va_handle;
|
||||
|
||||
/**
|
||||
* Define handle for semaphore
|
||||
*/
|
||||
typedef struct amdgpu_semaphore *amdgpu_semaphore_handle;
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/* -------------------------- Structures ---------------------------------- */
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
@ -1180,4 +1185,64 @@ int amdgpu_bo_va_op(amdgpu_bo_handle bo,
|
|||
uint64_t flags,
|
||||
uint32_t ops);
|
||||
|
||||
/**
|
||||
* create semaphore
|
||||
*
|
||||
* \param sem - \c [out] semaphore handle
|
||||
*
|
||||
* \return 0 on success\n
|
||||
* <0 - Negative POSIX Error code
|
||||
*
|
||||
*/
|
||||
int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem);
|
||||
|
||||
/**
|
||||
* signal semaphore
|
||||
*
|
||||
* \param context - \c [in] GPU Context
|
||||
* \param ip_type - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
|
||||
* \param ip_instance - \c [in] Index of the IP block of the same type
|
||||
* \param ring - \c [in] Specify ring index of the IP
|
||||
* \param sem - \c [in] semaphore handle
|
||||
*
|
||||
* \return 0 on success\n
|
||||
* <0 - Negative POSIX Error code
|
||||
*
|
||||
*/
|
||||
int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
|
||||
uint32_t ip_type,
|
||||
uint32_t ip_instance,
|
||||
uint32_t ring,
|
||||
amdgpu_semaphore_handle sem);
|
||||
|
||||
/**
|
||||
* wait semaphore
|
||||
*
|
||||
* \param context - \c [in] GPU Context
|
||||
* \param ip_type - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
|
||||
* \param ip_instance - \c [in] Index of the IP block of the same type
|
||||
* \param ring - \c [in] Specify ring index of the IP
|
||||
* \param sem - \c [in] semaphore handle
|
||||
*
|
||||
* \return 0 on success\n
|
||||
* <0 - Negative POSIX Error code
|
||||
*
|
||||
*/
|
||||
int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
|
||||
uint32_t ip_type,
|
||||
uint32_t ip_instance,
|
||||
uint32_t ring,
|
||||
amdgpu_semaphore_handle sem);
|
||||
|
||||
/**
|
||||
* destroy semaphore
|
||||
*
|
||||
* \param sem - \c [in] semaphore handle
|
||||
*
|
||||
* \return 0 on success\n
|
||||
* <0 - Negative POSIX Error code
|
||||
*
|
||||
*/
|
||||
int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
|
||||
|
||||
#endif /* #ifdef _AMDGPU_H_ */
|
||||
|
|
|
@ -40,6 +40,9 @@
|
|||
#include "amdgpu_drm.h"
|
||||
#include "amdgpu_internal.h"
|
||||
|
||||
static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem);
|
||||
static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
|
||||
|
||||
/**
|
||||
* Create command submission context
|
||||
*
|
||||
|
@ -53,6 +56,7 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
|
|||
{
|
||||
struct amdgpu_context *gpu_context;
|
||||
union drm_amdgpu_ctx args;
|
||||
int i, j, k;
|
||||
int r;
|
||||
|
||||
if (NULL == dev)
|
||||
|
@ -66,6 +70,10 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
|
|||
|
||||
gpu_context->dev = dev;
|
||||
|
||||
r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
|
||||
if (r)
|
||||
goto error;
|
||||
|
||||
/* Create the context */
|
||||
memset(&args, 0, sizeof(args));
|
||||
args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
|
||||
|
@ -74,11 +82,16 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
|
|||
goto error;
|
||||
|
||||
gpu_context->id = args.out.alloc.ctx_id;
|
||||
for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
|
||||
for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++)
|
||||
for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++)
|
||||
list_inithead(&gpu_context->sem_list[i][j][k]);
|
||||
*context = (amdgpu_context_handle)gpu_context;
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
pthread_mutex_destroy(&gpu_context->sequence_mutex);
|
||||
free(gpu_context);
|
||||
return r;
|
||||
}
|
||||
|
@ -94,18 +107,32 @@ error:
|
|||
int amdgpu_cs_ctx_free(amdgpu_context_handle context)
|
||||
{
|
||||
union drm_amdgpu_ctx args;
|
||||
int i, j, k;
|
||||
int r;
|
||||
|
||||
if (NULL == context)
|
||||
return -EINVAL;
|
||||
|
||||
pthread_mutex_destroy(&context->sequence_mutex);
|
||||
|
||||
/* now deal with kernel side */
|
||||
memset(&args, 0, sizeof(args));
|
||||
args.in.op = AMDGPU_CTX_OP_FREE_CTX;
|
||||
args.in.ctx_id = context->id;
|
||||
r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
|
||||
&args, sizeof(args));
|
||||
|
||||
for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
|
||||
for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) {
|
||||
for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) {
|
||||
amdgpu_semaphore_handle sem;
|
||||
LIST_FOR_EACH_ENTRY(sem, &context->sem_list[i][j][k], list) {
|
||||
list_del(&sem->list);
|
||||
amdgpu_cs_reset_sem(sem);
|
||||
amdgpu_cs_unreference_sem(sem);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(context);
|
||||
|
||||
return r;
|
||||
|
@ -150,7 +177,10 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
|
|||
struct drm_amdgpu_cs_chunk *chunks;
|
||||
struct drm_amdgpu_cs_chunk_data *chunk_data;
|
||||
struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
|
||||
uint32_t i, size;
|
||||
struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
|
||||
struct list_head *sem_list;
|
||||
amdgpu_semaphore_handle sem;
|
||||
uint32_t i, size, sem_count = 0;
|
||||
bool user_fence;
|
||||
int r = 0;
|
||||
|
||||
|
@ -162,7 +192,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
|
|||
return -EINVAL;
|
||||
user_fence = (ibs_request->fence_info.handle != NULL);
|
||||
|
||||
size = ibs_request->number_of_ibs + (user_fence ? 2 : 1);
|
||||
size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
|
||||
|
||||
chunk_array = alloca(sizeof(uint64_t) * size);
|
||||
chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
|
||||
|
@ -196,6 +226,8 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
|
|||
chunk_data[i].ib_data.flags = ib->flags;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&context->sequence_mutex);
|
||||
|
||||
if (user_fence) {
|
||||
i = cs.in.num_chunks++;
|
||||
|
||||
|
@ -240,15 +272,49 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
|
|||
chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
|
||||
}
|
||||
|
||||
sem_list = &context->sem_list[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring];
|
||||
LIST_FOR_EACH_ENTRY(sem, sem_list, list)
|
||||
sem_count++;
|
||||
if (sem_count) {
|
||||
sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_count);
|
||||
if (!sem_dependencies) {
|
||||
r = -ENOMEM;
|
||||
goto error_unlock;
|
||||
}
|
||||
sem_count = 0;
|
||||
LIST_FOR_EACH_ENTRY(sem, sem_list, list) {
|
||||
struct amdgpu_cs_fence *info = &sem->signal_fence;
|
||||
struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
|
||||
dep->ip_type = info->ip_type;
|
||||
dep->ip_instance = info->ip_instance;
|
||||
dep->ring = info->ring;
|
||||
dep->ctx_id = info->context->id;
|
||||
dep->handle = info->fence;
|
||||
|
||||
list_del(&sem->list);
|
||||
amdgpu_cs_reset_sem(sem);
|
||||
amdgpu_cs_unreference_sem(sem);
|
||||
}
|
||||
i = cs.in.num_chunks++;
|
||||
|
||||
/* dependencies chunk */
|
||||
chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
|
||||
chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
|
||||
chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
|
||||
chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
|
||||
}
|
||||
|
||||
r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
|
||||
&cs, sizeof(cs));
|
||||
if (r)
|
||||
goto error_unlock;
|
||||
|
||||
ibs_request->seq_no = cs.out.handle;
|
||||
|
||||
context->last_seq[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring] = ibs_request->seq_no;
|
||||
error_unlock:
|
||||
pthread_mutex_unlock(&context->sequence_mutex);
|
||||
free(dependencies);
|
||||
free(sem_dependencies);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -369,3 +435,102 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
|
|||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
|
||||
{
|
||||
struct amdgpu_semaphore *gpu_semaphore;
|
||||
|
||||
if (NULL == sem)
|
||||
return -EINVAL;
|
||||
|
||||
gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
|
||||
if (NULL == gpu_semaphore)
|
||||
return -ENOMEM;
|
||||
|
||||
atomic_set(&gpu_semaphore->refcount, 1);
|
||||
*sem = gpu_semaphore;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
|
||||
uint32_t ip_type,
|
||||
uint32_t ip_instance,
|
||||
uint32_t ring,
|
||||
amdgpu_semaphore_handle sem)
|
||||
{
|
||||
if (NULL == ctx)
|
||||
return -EINVAL;
|
||||
if (ip_type >= AMDGPU_HW_IP_NUM)
|
||||
return -EINVAL;
|
||||
if (ring >= AMDGPU_CS_MAX_RINGS)
|
||||
return -EINVAL;
|
||||
if (NULL == sem)
|
||||
return -EINVAL;
|
||||
/* sem has been signaled */
|
||||
if (sem->signal_fence.context)
|
||||
return -EINVAL;
|
||||
pthread_mutex_lock(&ctx->sequence_mutex);
|
||||
sem->signal_fence.context = ctx;
|
||||
sem->signal_fence.ip_type = ip_type;
|
||||
sem->signal_fence.ip_instance = ip_instance;
|
||||
sem->signal_fence.ring = ring;
|
||||
sem->signal_fence.fence = ctx->last_seq[ip_type][ip_instance][ring];
|
||||
update_references(NULL, &sem->refcount);
|
||||
pthread_mutex_unlock(&ctx->sequence_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
|
||||
uint32_t ip_type,
|
||||
uint32_t ip_instance,
|
||||
uint32_t ring,
|
||||
amdgpu_semaphore_handle sem)
|
||||
{
|
||||
if (NULL == ctx)
|
||||
return -EINVAL;
|
||||
if (ip_type >= AMDGPU_HW_IP_NUM)
|
||||
return -EINVAL;
|
||||
if (ring >= AMDGPU_CS_MAX_RINGS)
|
||||
return -EINVAL;
|
||||
if (NULL == sem)
|
||||
return -EINVAL;
|
||||
/* must signal first */
|
||||
if (NULL == sem->signal_fence.context)
|
||||
return -EINVAL;
|
||||
|
||||
pthread_mutex_lock(&ctx->sequence_mutex);
|
||||
list_add(&sem->list, &ctx->sem_list[ip_type][ip_instance][ring]);
|
||||
pthread_mutex_unlock(&ctx->sequence_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)
|
||||
{
|
||||
if (NULL == sem)
|
||||
return -EINVAL;
|
||||
if (NULL == sem->signal_fence.context)
|
||||
return -EINVAL;
|
||||
|
||||
sem->signal_fence.context = NULL;;
|
||||
sem->signal_fence.ip_type = 0;
|
||||
sem->signal_fence.ip_instance = 0;
|
||||
sem->signal_fence.ring = 0;
|
||||
sem->signal_fence.fence = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)
|
||||
{
|
||||
if (NULL == sem)
|
||||
return -EINVAL;
|
||||
|
||||
if (update_references(&sem->refcount, NULL))
|
||||
free(sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
|
||||
{
|
||||
return amdgpu_cs_unreference_sem(sem);
|
||||
}
|
||||
|
|
|
@ -111,8 +111,23 @@ struct amdgpu_bo_list {
|
|||
|
||||
struct amdgpu_context {
|
||||
struct amdgpu_device *dev;
|
||||
/** Mutex for accessing fences and to maintain command submissions
|
||||
in good sequence. */
|
||||
pthread_mutex_t sequence_mutex;
|
||||
/* context id*/
|
||||
uint32_t id;
|
||||
uint64_t last_seq[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
|
||||
struct list_head sem_list[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
|
||||
};
|
||||
|
||||
/**
|
||||
* Structure describing sw semaphore based on scheduler
|
||||
*
|
||||
*/
|
||||
struct amdgpu_semaphore {
|
||||
atomic_t refcount;
|
||||
struct list_head list;
|
||||
struct amdgpu_cs_fence signal_fence;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue