amdgpu: add semaphore support

the semaphore is a binary semaphore. the work flow is:
1. create sem
2. signal sem
3. wait sem, reset sem after signalled
4. destroy sem.

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
main
Marek Olšák 2016-01-12 22:13:07 +01:00
parent f06c992819
commit 6afadeaf13
3 changed files with 249 additions and 4 deletions

View File

@ -124,6 +124,11 @@ typedef struct amdgpu_bo_list *amdgpu_bo_list_handle;
*/
typedef struct amdgpu_va *amdgpu_va_handle;
/**
* Define handle for semaphore
*/
typedef struct amdgpu_semaphore *amdgpu_semaphore_handle;
/*--------------------------------------------------------------------------*/
/* -------------------------- Structures ---------------------------------- */
/*--------------------------------------------------------------------------*/
@ -1180,4 +1185,64 @@ int amdgpu_bo_va_op(amdgpu_bo_handle bo,
uint64_t flags,
uint32_t ops);
/**
* create semaphore
*
* \param sem - \c [out] semaphore handle
*
* \return 0 on success\n
* <0 - Negative POSIX Error code
*
*/
int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem);
/**
* signal semaphore
*
* \param context - \c [in] GPU Context
* \param ip_type - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
* \param ip_instance - \c [in] Index of the IP block of the same type
* \param ring - \c [in] Specify ring index of the IP
* \param sem - \c [in] semaphore handle
*
* \return 0 on success\n
* <0 - Negative POSIX Error code
*
*/
int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
uint32_t ip_type,
uint32_t ip_instance,
uint32_t ring,
amdgpu_semaphore_handle sem);
/**
* wait semaphore
*
* \param context - \c [in] GPU Context
* \param ip_type - \c [in] Hardware IP block type = AMDGPU_HW_IP_*
* \param ip_instance - \c [in] Index of the IP block of the same type
* \param ring - \c [in] Specify ring index of the IP
* \param sem - \c [in] semaphore handle
*
* \return 0 on success\n
* <0 - Negative POSIX Error code
*
*/
int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
uint32_t ip_type,
uint32_t ip_instance,
uint32_t ring,
amdgpu_semaphore_handle sem);
/**
* destroy semaphore
*
* \param sem - \c [in] semaphore handle
*
* \return 0 on success\n
* <0 - Negative POSIX Error code
*
*/
int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
#endif /* #ifdef _AMDGPU_H_ */

View File

@ -40,6 +40,9 @@
#include "amdgpu_drm.h"
#include "amdgpu_internal.h"
static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem);
static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
/**
* Create command submission context
*
@ -53,6 +56,7 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
{
struct amdgpu_context *gpu_context;
union drm_amdgpu_ctx args;
int i, j, k;
int r;
if (NULL == dev)
@ -66,6 +70,10 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
gpu_context->dev = dev;
r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
if (r)
goto error;
/* Create the context */
memset(&args, 0, sizeof(args));
args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
@ -74,11 +82,16 @@ int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
goto error;
gpu_context->id = args.out.alloc.ctx_id;
for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++)
for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++)
list_inithead(&gpu_context->sem_list[i][j][k]);
*context = (amdgpu_context_handle)gpu_context;
return 0;
error:
pthread_mutex_destroy(&gpu_context->sequence_mutex);
free(gpu_context);
return r;
}
@ -94,18 +107,32 @@ error:
int amdgpu_cs_ctx_free(amdgpu_context_handle context)
{
union drm_amdgpu_ctx args;
int i, j, k;
int r;
if (NULL == context)
return -EINVAL;
pthread_mutex_destroy(&context->sequence_mutex);
/* now deal with kernel side */
memset(&args, 0, sizeof(args));
args.in.op = AMDGPU_CTX_OP_FREE_CTX;
args.in.ctx_id = context->id;
r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
&args, sizeof(args));
for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) {
for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) {
amdgpu_semaphore_handle sem;
LIST_FOR_EACH_ENTRY(sem, &context->sem_list[i][j][k], list) {
list_del(&sem->list);
amdgpu_cs_reset_sem(sem);
amdgpu_cs_unreference_sem(sem);
}
}
}
}
free(context);
return r;
@ -150,7 +177,10 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
struct drm_amdgpu_cs_chunk *chunks;
struct drm_amdgpu_cs_chunk_data *chunk_data;
struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
uint32_t i, size;
struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
struct list_head *sem_list;
amdgpu_semaphore_handle sem;
uint32_t i, size, sem_count = 0;
bool user_fence;
int r = 0;
@ -162,7 +192,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
return -EINVAL;
user_fence = (ibs_request->fence_info.handle != NULL);
size = ibs_request->number_of_ibs + (user_fence ? 2 : 1);
size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
chunk_array = alloca(sizeof(uint64_t) * size);
chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
@ -196,6 +226,8 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
chunk_data[i].ib_data.flags = ib->flags;
}
pthread_mutex_lock(&context->sequence_mutex);
if (user_fence) {
i = cs.in.num_chunks++;
@ -240,15 +272,49 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
}
sem_list = &context->sem_list[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring];
LIST_FOR_EACH_ENTRY(sem, sem_list, list)
sem_count++;
if (sem_count) {
sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_count);
if (!sem_dependencies) {
r = -ENOMEM;
goto error_unlock;
}
sem_count = 0;
LIST_FOR_EACH_ENTRY(sem, sem_list, list) {
struct amdgpu_cs_fence *info = &sem->signal_fence;
struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
dep->ip_type = info->ip_type;
dep->ip_instance = info->ip_instance;
dep->ring = info->ring;
dep->ctx_id = info->context->id;
dep->handle = info->fence;
list_del(&sem->list);
amdgpu_cs_reset_sem(sem);
amdgpu_cs_unreference_sem(sem);
}
i = cs.in.num_chunks++;
/* dependencies chunk */
chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
}
r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
&cs, sizeof(cs));
if (r)
goto error_unlock;
ibs_request->seq_no = cs.out.handle;
context->last_seq[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring] = ibs_request->seq_no;
error_unlock:
pthread_mutex_unlock(&context->sequence_mutex);
free(dependencies);
free(sem_dependencies);
return r;
}
@ -369,3 +435,102 @@ int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
return r;
}
int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
{
struct amdgpu_semaphore *gpu_semaphore;
if (NULL == sem)
return -EINVAL;
gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
if (NULL == gpu_semaphore)
return -ENOMEM;
atomic_set(&gpu_semaphore->refcount, 1);
*sem = gpu_semaphore;
return 0;
}
int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
uint32_t ip_type,
uint32_t ip_instance,
uint32_t ring,
amdgpu_semaphore_handle sem)
{
if (NULL == ctx)
return -EINVAL;
if (ip_type >= AMDGPU_HW_IP_NUM)
return -EINVAL;
if (ring >= AMDGPU_CS_MAX_RINGS)
return -EINVAL;
if (NULL == sem)
return -EINVAL;
/* sem has been signaled */
if (sem->signal_fence.context)
return -EINVAL;
pthread_mutex_lock(&ctx->sequence_mutex);
sem->signal_fence.context = ctx;
sem->signal_fence.ip_type = ip_type;
sem->signal_fence.ip_instance = ip_instance;
sem->signal_fence.ring = ring;
sem->signal_fence.fence = ctx->last_seq[ip_type][ip_instance][ring];
update_references(NULL, &sem->refcount);
pthread_mutex_unlock(&ctx->sequence_mutex);
return 0;
}
int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
uint32_t ip_type,
uint32_t ip_instance,
uint32_t ring,
amdgpu_semaphore_handle sem)
{
if (NULL == ctx)
return -EINVAL;
if (ip_type >= AMDGPU_HW_IP_NUM)
return -EINVAL;
if (ring >= AMDGPU_CS_MAX_RINGS)
return -EINVAL;
if (NULL == sem)
return -EINVAL;
/* must signal first */
if (NULL == sem->signal_fence.context)
return -EINVAL;
pthread_mutex_lock(&ctx->sequence_mutex);
list_add(&sem->list, &ctx->sem_list[ip_type][ip_instance][ring]);
pthread_mutex_unlock(&ctx->sequence_mutex);
return 0;
}
static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)
{
if (NULL == sem)
return -EINVAL;
if (NULL == sem->signal_fence.context)
return -EINVAL;
sem->signal_fence.context = NULL;;
sem->signal_fence.ip_type = 0;
sem->signal_fence.ip_instance = 0;
sem->signal_fence.ring = 0;
sem->signal_fence.fence = 0;
return 0;
}
static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)
{
if (NULL == sem)
return -EINVAL;
if (update_references(&sem->refcount, NULL))
free(sem);
return 0;
}
int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
{
return amdgpu_cs_unreference_sem(sem);
}

View File

@ -111,8 +111,23 @@ struct amdgpu_bo_list {
struct amdgpu_context {
struct amdgpu_device *dev;
/** Mutex for accessing fences and to maintain command submissions
in good sequence. */
pthread_mutex_t sequence_mutex;
/* context id*/
uint32_t id;
uint64_t last_seq[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
struct list_head sem_list[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
};
/**
* Structure describing sw semaphore based on scheduler
*
*/
struct amdgpu_semaphore {
atomic_t refcount;
struct list_head list;
struct amdgpu_cs_fence signal_fence;
};
/**