Merge branch 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux into drm-next

A few more things for 4.13:
- Semaphore support using sync objects
- Drop fb location programming
- Optimize bo list ioctl

* 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux:
  drm/amdgpu: Optimize mutex usage (v4)
  drm/amdgpu: Optimization of AMDGPU_BO_LIST_OP_CREATE (v2)
  amdgpu: use drm sync objects for shared semaphores (v6)
  amdgpu/cs: split out fence dependency checking (v2)
  drm/amdgpu: don't check the default value for vm size
This commit is contained in:
Dave Airlie 2017-06-20 11:19:08 +10:00
commit 8c52f36413
6 changed files with 210 additions and 77 deletions

View File

@ -869,6 +869,8 @@ struct amdgpu_fpriv {
struct amdgpu_bo_list { struct amdgpu_bo_list {
struct mutex lock; struct mutex lock;
struct rcu_head rhead;
struct kref refcount;
struct amdgpu_bo *gds_obj; struct amdgpu_bo *gds_obj;
struct amdgpu_bo *gws_obj; struct amdgpu_bo *gws_obj;
struct amdgpu_bo *oa_obj; struct amdgpu_bo *oa_obj;
@ -1159,6 +1161,9 @@ struct amdgpu_cs_parser {
/* user fence */ /* user fence */
struct amdgpu_bo_list_entry uf_entry; struct amdgpu_bo_list_entry uf_entry;
unsigned num_post_dep_syncobjs;
struct drm_syncobj **post_dep_syncobjs;
}; };
#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */

View File

@ -35,33 +35,59 @@
#define AMDGPU_BO_LIST_MAX_PRIORITY 32u #define AMDGPU_BO_LIST_MAX_PRIORITY 32u
#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1)
static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, static int amdgpu_bo_list_set(struct amdgpu_device *adev,
struct amdgpu_bo_list **result, struct drm_file *filp,
struct amdgpu_bo_list *list,
struct drm_amdgpu_bo_list_entry *info,
unsigned num_entries);
static void amdgpu_bo_list_release_rcu(struct kref *ref)
{
unsigned i;
struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
refcount);
for (i = 0; i < list->num_entries; ++i)
amdgpu_bo_unref(&list->array[i].robj);
mutex_destroy(&list->lock);
kvfree(list->array);
kfree_rcu(list, rhead);
}
static int amdgpu_bo_list_create(struct amdgpu_device *adev,
struct drm_file *filp,
struct drm_amdgpu_bo_list_entry *info,
unsigned num_entries,
int *id) int *id)
{ {
int r; int r;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_bo_list *list;
*result = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
if (!*result) if (!list)
return -ENOMEM; return -ENOMEM;
/* initialize bo list*/
mutex_init(&list->lock);
kref_init(&list->refcount);
r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
if (r) {
kfree(list);
return r;
}
/* idr alloc should be called only after initialization of bo list. */
mutex_lock(&fpriv->bo_list_lock); mutex_lock(&fpriv->bo_list_lock);
r = idr_alloc(&fpriv->bo_list_handles, *result, r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
1, 0, GFP_KERNEL); mutex_unlock(&fpriv->bo_list_lock);
if (r < 0) { if (r < 0) {
mutex_unlock(&fpriv->bo_list_lock); kfree(list);
kfree(*result);
return r; return r;
} }
*id = r; *id = r;
mutex_init(&(*result)->lock);
(*result)->num_entries = 0;
(*result)->array = NULL;
mutex_lock(&(*result)->lock);
mutex_unlock(&fpriv->bo_list_lock);
return 0; return 0;
} }
@ -71,13 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
mutex_lock(&fpriv->bo_list_lock); mutex_lock(&fpriv->bo_list_lock);
list = idr_remove(&fpriv->bo_list_handles, id); list = idr_remove(&fpriv->bo_list_handles, id);
if (list) {
/* Another user may have a reference to this list still */
mutex_lock(&list->lock);
mutex_unlock(&list->lock);
amdgpu_bo_list_free(list);
}
mutex_unlock(&fpriv->bo_list_lock); mutex_unlock(&fpriv->bo_list_lock);
if (list)
kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
} }
static int amdgpu_bo_list_set(struct amdgpu_device *adev, static int amdgpu_bo_list_set(struct amdgpu_device *adev,
@ -172,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
{ {
struct amdgpu_bo_list *result; struct amdgpu_bo_list *result;
mutex_lock(&fpriv->bo_list_lock); rcu_read_lock();
result = idr_find(&fpriv->bo_list_handles, id); result = idr_find(&fpriv->bo_list_handles, id);
if (result)
mutex_lock(&result->lock); if (result) {
mutex_unlock(&fpriv->bo_list_lock); if (kref_get_unless_zero(&result->refcount))
mutex_lock(&result->lock);
else
result = NULL;
}
rcu_read_unlock();
return result; return result;
} }
@ -214,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
void amdgpu_bo_list_put(struct amdgpu_bo_list *list) void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
{ {
mutex_unlock(&list->lock); mutex_unlock(&list->lock);
kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
} }
void amdgpu_bo_list_free(struct amdgpu_bo_list *list) void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
@ -273,16 +302,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
switch (args->in.operation) { switch (args->in.operation) {
case AMDGPU_BO_LIST_OP_CREATE: case AMDGPU_BO_LIST_OP_CREATE:
r = amdgpu_bo_list_create(fpriv, &list, &handle); r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
&handle);
if (r) if (r)
goto error_free; goto error_free;
r = amdgpu_bo_list_set(adev, filp, list, info,
args->in.bo_number);
amdgpu_bo_list_put(list);
if (r)
goto error_free;
break; break;
case AMDGPU_BO_LIST_OP_DESTROY: case AMDGPU_BO_LIST_OP_DESTROY:

View File

@ -27,6 +27,7 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <drm/drmP.h> #include <drm/drmP.h>
#include <drm/amdgpu_drm.h> #include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_trace.h" #include "amdgpu_trace.h"
@ -154,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
break; break;
case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
break; break;
default: default:
@ -682,6 +685,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
ttm_eu_backoff_reservation(&parser->ticket, ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated); &parser->validated);
} }
for (i = 0; i < parser->num_post_dep_syncobjs; i++)
drm_syncobj_put(parser->post_dep_syncobjs[i]);
kfree(parser->post_dep_syncobjs);
dma_fence_put(parser->fence); dma_fence_put(parser->fence);
if (parser->ctx) if (parser->ctx)
@ -923,65 +931,150 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return 0; return 0;
} }
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
unsigned num_deps;
int i, r;
struct drm_amdgpu_cs_chunk_dep *deps;
deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_dep);
for (i = 0; i < num_deps; ++i) {
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
if (ctx == NULL)
return -EINVAL;
r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
deps[i].ip_type,
deps[i].ip_instance,
deps[i].ring, &ring);
if (r) {
amdgpu_ctx_put(ctx);
return r;
}
fence = amdgpu_ctx_get_fence(ctx, ring,
deps[i].handle);
if (IS_ERR(fence)) {
r = PTR_ERR(fence);
amdgpu_ctx_put(ctx);
return r;
} else if (fence) {
r = amdgpu_sync_fence(p->adev, &p->job->sync,
fence);
dma_fence_put(fence);
amdgpu_ctx_put(ctx);
if (r)
return r;
}
}
return 0;
}
static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
uint32_t handle)
{
int r;
struct dma_fence *fence;
r = drm_syncobj_fence_get(p->filp, handle, &fence);
if (r)
return r;
r = amdgpu_sync_fence(p->adev, &p->job->sync, fence);
dma_fence_put(fence);
return r;
}
static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
unsigned num_deps;
int i, r;
struct drm_amdgpu_cs_chunk_sem *deps;
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_sem);
for (i = 0; i < num_deps; ++i) {
r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
if (r)
return r;
}
return 0;
}
static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
struct amdgpu_cs_chunk *chunk)
{
unsigned num_deps;
int i;
struct drm_amdgpu_cs_chunk_sem *deps;
deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_sem);
p->post_dep_syncobjs = kmalloc_array(num_deps,
sizeof(struct drm_syncobj *),
GFP_KERNEL);
p->num_post_dep_syncobjs = 0;
for (i = 0; i < num_deps; ++i) {
p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
if (!p->post_dep_syncobjs[i])
return -EINVAL;
p->num_post_dep_syncobjs++;
}
return 0;
}
static int amdgpu_cs_dependencies(struct amdgpu_device *adev, static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
struct amdgpu_cs_parser *p) struct amdgpu_cs_parser *p)
{ {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv; int i, r;
int i, j, r;
for (i = 0; i < p->nchunks; ++i) { for (i = 0; i < p->nchunks; ++i) {
struct drm_amdgpu_cs_chunk_dep *deps;
struct amdgpu_cs_chunk *chunk; struct amdgpu_cs_chunk *chunk;
unsigned num_deps;
chunk = &p->chunks[i]; chunk = &p->chunks[i];
if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
continue; r = amdgpu_cs_process_fence_dep(p, chunk);
if (r)
deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_dep);
for (j = 0; j < num_deps; ++j) {
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
if (ctx == NULL)
return -EINVAL;
r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
deps[j].ip_type,
deps[j].ip_instance,
deps[j].ring, &ring);
if (r) {
amdgpu_ctx_put(ctx);
return r; return r;
} } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
fence = amdgpu_ctx_get_fence(ctx, ring, if (r)
deps[j].handle); return r;
if (IS_ERR(fence)) { } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
r = PTR_ERR(fence); r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
amdgpu_ctx_put(ctx); if (r)
return r; return r;
} else if (fence) {
r = amdgpu_sync_fence(adev, &p->job->sync,
fence);
dma_fence_put(fence);
amdgpu_ctx_put(ctx);
if (r)
return r;
}
} }
} }
return 0; return 0;
} }
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
{
int i;
for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
drm_syncobj_replace_fence(p->filp, p->post_dep_syncobjs[i],
p->fence);
}
}
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs) union drm_amdgpu_cs *cs)
{ {
@ -1002,6 +1095,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
job->owner = p->filp; job->owner = p->filp;
job->fence_ctx = entity->fence_context; job->fence_ctx = entity->fence_context;
p->fence = dma_fence_get(&job->base.s_fence->finished); p->fence = dma_fence_get(&job->base.s_fence->finished);
amdgpu_cs_post_dependencies(p);
cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
job->uf_sequence = cs->out.handle; job->uf_sequence = cs->out.handle;
amdgpu_job_free_resources(job); amdgpu_job_free_resources(job);
@ -1009,7 +1105,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
trace_amdgpu_cs_ioctl(job); trace_amdgpu_cs_ioctl(job);
amd_sched_entity_push_job(&job->base); amd_sched_entity_push_job(&job->base);
return 0; return 0;
} }

View File

@ -1073,6 +1073,10 @@ def_value:
static void amdgpu_check_vm_size(struct amdgpu_device *adev) static void amdgpu_check_vm_size(struct amdgpu_device *adev)
{ {
/* no need to check the default value */
if (amdgpu_vm_size == -1)
return;
if (!amdgpu_check_pot_argument(amdgpu_vm_size)) { if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
dev_warn(adev->dev, "VM size (%d) must be a power of 2\n", dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
amdgpu_vm_size); amdgpu_vm_size);

View File

@ -782,7 +782,7 @@ static struct drm_driver kms_driver = {
.driver_features = .driver_features =
DRIVER_USE_AGP | DRIVER_USE_AGP |
DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
.load = amdgpu_driver_load_kms, .load = amdgpu_driver_load_kms,
.open = amdgpu_driver_open_kms, .open = amdgpu_driver_open_kms,
.postclose = amdgpu_driver_postclose_kms, .postclose = amdgpu_driver_postclose_kms,

View File

@ -440,6 +440,8 @@ struct drm_amdgpu_gem_va {
#define AMDGPU_CHUNK_ID_IB 0x01 #define AMDGPU_CHUNK_ID_IB 0x01
#define AMDGPU_CHUNK_ID_FENCE 0x02 #define AMDGPU_CHUNK_ID_FENCE 0x02
#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03
#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04
#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05
struct drm_amdgpu_cs_chunk { struct drm_amdgpu_cs_chunk {
__u32 chunk_id; __u32 chunk_id;
@ -507,6 +509,10 @@ struct drm_amdgpu_cs_chunk_fence {
__u32 offset; __u32 offset;
}; };
struct drm_amdgpu_cs_chunk_sem {
__u32 handle;
};
struct drm_amdgpu_cs_chunk_data { struct drm_amdgpu_cs_chunk_data {
union { union {
struct drm_amdgpu_cs_chunk_ib ib_data; struct drm_amdgpu_cs_chunk_ib ib_data;