mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-27 22:53:55 +08:00
Merge branch 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux into drm-next
A few more things for 4.13: - Semaphore support using sync objects - Drop fb location programming - Optimize bo list ioctl * 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux: drm/amdgpu: Optimize mutex usage (v4) drm/amdgpu: Optimization of AMDGPU_BO_LIST_OP_CREATE (v2) amdgpu: use drm sync objects for shared semaphores (v6) amdgpu/cs: split out fence dependency checking (v2) drm/amdgpu: don't check the default value for vm size
This commit is contained in:
commit
8c52f36413
@ -869,6 +869,8 @@ struct amdgpu_fpriv {
|
||||
|
||||
struct amdgpu_bo_list {
|
||||
struct mutex lock;
|
||||
struct rcu_head rhead;
|
||||
struct kref refcount;
|
||||
struct amdgpu_bo *gds_obj;
|
||||
struct amdgpu_bo *gws_obj;
|
||||
struct amdgpu_bo *oa_obj;
|
||||
@ -1159,6 +1161,9 @@ struct amdgpu_cs_parser {
|
||||
|
||||
/* user fence */
|
||||
struct amdgpu_bo_list_entry uf_entry;
|
||||
|
||||
unsigned num_post_dep_syncobjs;
|
||||
struct drm_syncobj **post_dep_syncobjs;
|
||||
};
|
||||
|
||||
#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
|
||||
|
@ -35,33 +35,59 @@
|
||||
#define AMDGPU_BO_LIST_MAX_PRIORITY 32u
|
||||
#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1)
|
||||
|
||||
static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
|
||||
struct amdgpu_bo_list **result,
|
||||
static int amdgpu_bo_list_set(struct amdgpu_device *adev,
|
||||
struct drm_file *filp,
|
||||
struct amdgpu_bo_list *list,
|
||||
struct drm_amdgpu_bo_list_entry *info,
|
||||
unsigned num_entries);
|
||||
|
||||
static void amdgpu_bo_list_release_rcu(struct kref *ref)
|
||||
{
|
||||
unsigned i;
|
||||
struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
|
||||
refcount);
|
||||
|
||||
for (i = 0; i < list->num_entries; ++i)
|
||||
amdgpu_bo_unref(&list->array[i].robj);
|
||||
|
||||
mutex_destroy(&list->lock);
|
||||
kvfree(list->array);
|
||||
kfree_rcu(list, rhead);
|
||||
}
|
||||
|
||||
static int amdgpu_bo_list_create(struct amdgpu_device *adev,
|
||||
struct drm_file *filp,
|
||||
struct drm_amdgpu_bo_list_entry *info,
|
||||
unsigned num_entries,
|
||||
int *id)
|
||||
{
|
||||
int r;
|
||||
struct amdgpu_fpriv *fpriv = filp->driver_priv;
|
||||
struct amdgpu_bo_list *list;
|
||||
|
||||
*result = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
|
||||
if (!*result)
|
||||
list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
|
||||
if (!list)
|
||||
return -ENOMEM;
|
||||
|
||||
/* initialize bo list*/
|
||||
mutex_init(&list->lock);
|
||||
kref_init(&list->refcount);
|
||||
r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
|
||||
if (r) {
|
||||
kfree(list);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* idr alloc should be called only after initialization of bo list. */
|
||||
mutex_lock(&fpriv->bo_list_lock);
|
||||
r = idr_alloc(&fpriv->bo_list_handles, *result,
|
||||
1, 0, GFP_KERNEL);
|
||||
r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
|
||||
mutex_unlock(&fpriv->bo_list_lock);
|
||||
if (r < 0) {
|
||||
mutex_unlock(&fpriv->bo_list_lock);
|
||||
kfree(*result);
|
||||
kfree(list);
|
||||
return r;
|
||||
}
|
||||
*id = r;
|
||||
|
||||
mutex_init(&(*result)->lock);
|
||||
(*result)->num_entries = 0;
|
||||
(*result)->array = NULL;
|
||||
|
||||
mutex_lock(&(*result)->lock);
|
||||
mutex_unlock(&fpriv->bo_list_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -71,13 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
|
||||
|
||||
mutex_lock(&fpriv->bo_list_lock);
|
||||
list = idr_remove(&fpriv->bo_list_handles, id);
|
||||
if (list) {
|
||||
/* Another user may have a reference to this list still */
|
||||
mutex_lock(&list->lock);
|
||||
mutex_unlock(&list->lock);
|
||||
amdgpu_bo_list_free(list);
|
||||
}
|
||||
mutex_unlock(&fpriv->bo_list_lock);
|
||||
if (list)
|
||||
kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
|
||||
}
|
||||
|
||||
static int amdgpu_bo_list_set(struct amdgpu_device *adev,
|
||||
@ -172,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
|
||||
{
|
||||
struct amdgpu_bo_list *result;
|
||||
|
||||
mutex_lock(&fpriv->bo_list_lock);
|
||||
rcu_read_lock();
|
||||
result = idr_find(&fpriv->bo_list_handles, id);
|
||||
if (result)
|
||||
mutex_lock(&result->lock);
|
||||
mutex_unlock(&fpriv->bo_list_lock);
|
||||
|
||||
if (result) {
|
||||
if (kref_get_unless_zero(&result->refcount))
|
||||
mutex_lock(&result->lock);
|
||||
else
|
||||
result = NULL;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -214,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
|
||||
void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
|
||||
{
|
||||
mutex_unlock(&list->lock);
|
||||
kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
|
||||
}
|
||||
|
||||
void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
|
||||
@ -273,16 +302,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
|
||||
|
||||
switch (args->in.operation) {
|
||||
case AMDGPU_BO_LIST_OP_CREATE:
|
||||
r = amdgpu_bo_list_create(fpriv, &list, &handle);
|
||||
r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
|
||||
&handle);
|
||||
if (r)
|
||||
goto error_free;
|
||||
|
||||
r = amdgpu_bo_list_set(adev, filp, list, info,
|
||||
args->in.bo_number);
|
||||
amdgpu_bo_list_put(list);
|
||||
if (r)
|
||||
goto error_free;
|
||||
|
||||
break;
|
||||
|
||||
case AMDGPU_BO_LIST_OP_DESTROY:
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <drm/drmP.h>
|
||||
#include <drm/amdgpu_drm.h>
|
||||
#include <drm/drm_syncobj.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_trace.h"
|
||||
|
||||
@ -154,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
|
||||
break;
|
||||
|
||||
case AMDGPU_CHUNK_ID_DEPENDENCIES:
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
|
||||
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -682,6 +685,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
|
||||
ttm_eu_backoff_reservation(&parser->ticket,
|
||||
&parser->validated);
|
||||
}
|
||||
|
||||
for (i = 0; i < parser->num_post_dep_syncobjs; i++)
|
||||
drm_syncobj_put(parser->post_dep_syncobjs[i]);
|
||||
kfree(parser->post_dep_syncobjs);
|
||||
|
||||
dma_fence_put(parser->fence);
|
||||
|
||||
if (parser->ctx)
|
||||
@ -923,65 +931,150 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
|
||||
struct amdgpu_cs_chunk *chunk)
|
||||
{
|
||||
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
||||
unsigned num_deps;
|
||||
int i, r;
|
||||
struct drm_amdgpu_cs_chunk_dep *deps;
|
||||
|
||||
deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
sizeof(struct drm_amdgpu_cs_chunk_dep);
|
||||
|
||||
for (i = 0; i < num_deps; ++i) {
|
||||
struct amdgpu_ring *ring;
|
||||
struct amdgpu_ctx *ctx;
|
||||
struct dma_fence *fence;
|
||||
|
||||
ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
|
||||
if (ctx == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
|
||||
deps[i].ip_type,
|
||||
deps[i].ip_instance,
|
||||
deps[i].ring, &ring);
|
||||
if (r) {
|
||||
amdgpu_ctx_put(ctx);
|
||||
return r;
|
||||
}
|
||||
|
||||
fence = amdgpu_ctx_get_fence(ctx, ring,
|
||||
deps[i].handle);
|
||||
if (IS_ERR(fence)) {
|
||||
r = PTR_ERR(fence);
|
||||
amdgpu_ctx_put(ctx);
|
||||
return r;
|
||||
} else if (fence) {
|
||||
r = amdgpu_sync_fence(p->adev, &p->job->sync,
|
||||
fence);
|
||||
dma_fence_put(fence);
|
||||
amdgpu_ctx_put(ctx);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
|
||||
uint32_t handle)
|
||||
{
|
||||
int r;
|
||||
struct dma_fence *fence;
|
||||
r = drm_syncobj_fence_get(p->filp, handle, &fence);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_sync_fence(p->adev, &p->job->sync, fence);
|
||||
dma_fence_put(fence);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
|
||||
struct amdgpu_cs_chunk *chunk)
|
||||
{
|
||||
unsigned num_deps;
|
||||
int i, r;
|
||||
struct drm_amdgpu_cs_chunk_sem *deps;
|
||||
|
||||
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
sizeof(struct drm_amdgpu_cs_chunk_sem);
|
||||
|
||||
for (i = 0; i < num_deps; ++i) {
|
||||
r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
|
||||
struct amdgpu_cs_chunk *chunk)
|
||||
{
|
||||
unsigned num_deps;
|
||||
int i;
|
||||
struct drm_amdgpu_cs_chunk_sem *deps;
|
||||
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
sizeof(struct drm_amdgpu_cs_chunk_sem);
|
||||
|
||||
p->post_dep_syncobjs = kmalloc_array(num_deps,
|
||||
sizeof(struct drm_syncobj *),
|
||||
GFP_KERNEL);
|
||||
p->num_post_dep_syncobjs = 0;
|
||||
|
||||
for (i = 0; i < num_deps; ++i) {
|
||||
p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
|
||||
if (!p->post_dep_syncobjs[i])
|
||||
return -EINVAL;
|
||||
p->num_post_dep_syncobjs++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
|
||||
struct amdgpu_cs_parser *p)
|
||||
{
|
||||
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
||||
int i, j, r;
|
||||
int i, r;
|
||||
|
||||
for (i = 0; i < p->nchunks; ++i) {
|
||||
struct drm_amdgpu_cs_chunk_dep *deps;
|
||||
struct amdgpu_cs_chunk *chunk;
|
||||
unsigned num_deps;
|
||||
|
||||
chunk = &p->chunks[i];
|
||||
|
||||
if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
|
||||
continue;
|
||||
|
||||
deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
|
||||
num_deps = chunk->length_dw * 4 /
|
||||
sizeof(struct drm_amdgpu_cs_chunk_dep);
|
||||
|
||||
for (j = 0; j < num_deps; ++j) {
|
||||
struct amdgpu_ring *ring;
|
||||
struct amdgpu_ctx *ctx;
|
||||
struct dma_fence *fence;
|
||||
|
||||
ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
|
||||
if (ctx == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
|
||||
deps[j].ip_type,
|
||||
deps[j].ip_instance,
|
||||
deps[j].ring, &ring);
|
||||
if (r) {
|
||||
amdgpu_ctx_put(ctx);
|
||||
if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
|
||||
r = amdgpu_cs_process_fence_dep(p, chunk);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
fence = amdgpu_ctx_get_fence(ctx, ring,
|
||||
deps[j].handle);
|
||||
if (IS_ERR(fence)) {
|
||||
r = PTR_ERR(fence);
|
||||
amdgpu_ctx_put(ctx);
|
||||
} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
|
||||
r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
|
||||
if (r)
|
||||
return r;
|
||||
} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
|
||||
r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
} else if (fence) {
|
||||
r = amdgpu_sync_fence(adev, &p->job->sync,
|
||||
fence);
|
||||
dma_fence_put(fence);
|
||||
amdgpu_ctx_put(ctx);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
|
||||
drm_syncobj_replace_fence(p->filp, p->post_dep_syncobjs[i],
|
||||
p->fence);
|
||||
}
|
||||
}
|
||||
|
||||
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||
union drm_amdgpu_cs *cs)
|
||||
{
|
||||
@ -1002,6 +1095,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||
job->owner = p->filp;
|
||||
job->fence_ctx = entity->fence_context;
|
||||
p->fence = dma_fence_get(&job->base.s_fence->finished);
|
||||
|
||||
amdgpu_cs_post_dependencies(p);
|
||||
|
||||
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
|
||||
job->uf_sequence = cs->out.handle;
|
||||
amdgpu_job_free_resources(job);
|
||||
@ -1009,7 +1105,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||
|
||||
trace_amdgpu_cs_ioctl(job);
|
||||
amd_sched_entity_push_job(&job->base);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1073,6 +1073,10 @@ def_value:
|
||||
|
||||
static void amdgpu_check_vm_size(struct amdgpu_device *adev)
|
||||
{
|
||||
/* no need to check the default value */
|
||||
if (amdgpu_vm_size == -1)
|
||||
return;
|
||||
|
||||
if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
|
||||
dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
|
||||
amdgpu_vm_size);
|
||||
|
@ -782,7 +782,7 @@ static struct drm_driver kms_driver = {
|
||||
.driver_features =
|
||||
DRIVER_USE_AGP |
|
||||
DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
|
||||
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
|
||||
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
|
||||
.load = amdgpu_driver_load_kms,
|
||||
.open = amdgpu_driver_open_kms,
|
||||
.postclose = amdgpu_driver_postclose_kms,
|
||||
|
@ -440,6 +440,8 @@ struct drm_amdgpu_gem_va {
|
||||
#define AMDGPU_CHUNK_ID_IB 0x01
|
||||
#define AMDGPU_CHUNK_ID_FENCE 0x02
|
||||
#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03
|
||||
#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04
|
||||
#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05
|
||||
|
||||
struct drm_amdgpu_cs_chunk {
|
||||
__u32 chunk_id;
|
||||
@ -507,6 +509,10 @@ struct drm_amdgpu_cs_chunk_fence {
|
||||
__u32 offset;
|
||||
};
|
||||
|
||||
struct drm_amdgpu_cs_chunk_sem {
|
||||
__u32 handle;
|
||||
};
|
||||
|
||||
struct drm_amdgpu_cs_chunk_data {
|
||||
union {
|
||||
struct drm_amdgpu_cs_chunk_ib ib_data;
|
||||
|
Loading…
Reference in New Issue
Block a user