drm/imagination: Implement job submission and scheduling

Implement job submission ioctl. Job scheduling is implemented using
drm_sched.

Jobs are submitted in a stream format. This is intended to allow the UAPI
data format to be independent of the actual FWIF structures in use, which
vary depending on the GPU in use.

The stream formats are documented at:
f8d2b42ae6/src/imagination/csbgen/rogue_kmd_stream.xml

Changes since v8:
- Updated for upstreamed DRM scheduler changes
- Removed workaround code for the pending_list previously being updated
  after run_job() returned
- Fixed null deref in pvr_queue_cleanup_fw_context() for bad stream ptr
  given to create_context ioctl
- Corrected license identifiers

Changes since v7:
- Updated for v8 "DRM scheduler changes for XE" patchset

Changes since v6:
- Fix fence handling in pvr_sync_signal_array_add()
- Add handling for SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE flag
- Fix missing dma_resv locking in job submit path

Changes since v5:
- Fix leak in job creation error path

Changes since v4:
- Use a regular workqueue for job scheduling

Changes since v3:
- Support partial render jobs
- Add job timeout handler
- Split sync handling out of job code
- Use drm_dev_{enter,exit}

Changes since v2:
- Use drm_sched for job scheduling

Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Co-developed-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Link: https://lore.kernel.org/r/c98dab7a5f5fb891fbed7e4990d19b5d13964365.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
This commit is contained in:
Sarah Walker 2023-11-22 16:34:38 +00:00 committed by Maxime Ripard
parent d2d79d29bb
commit eaf01ee5ba
No known key found for this signature in database
GPG Key ID: E3EF0D6F671851C5
15 changed files with 3438 additions and 4 deletions

View File

@ -6,6 +6,7 @@ config DRM_POWERVR
depends on ARM64
depends on DRM
depends on PM
select DRM_EXEC
select DRM_GEM_SHMEM_HELPER
select DRM_SCHED
select DRM_GPUVM

View File

@ -18,10 +18,13 @@ powervr-y := \
pvr_fw_trace.o \
pvr_gem.o \
pvr_hwrt.o \
pvr_job.o \
pvr_mmu.o \
pvr_power.o \
pvr_queue.o \
pvr_stream.o \
pvr_stream_defs.o \
pvr_sync.o \
pvr_vm.o \
pvr_vm_mips.o

View File

@ -6,10 +6,12 @@
#include "pvr_device.h"
#include "pvr_drv.h"
#include "pvr_gem.h"
#include "pvr_job.h"
#include "pvr_power.h"
#include "pvr_rogue_fwif.h"
#include "pvr_rogue_fwif_common.h"
#include "pvr_rogue_fwif_resetframework.h"
#include "pvr_stream.h"
#include "pvr_stream_defs.h"
#include "pvr_vm.h"
@ -164,6 +166,116 @@ ctx_fw_data_init(void *cpu_ptr, void *priv)
memcpy(cpu_ptr, ctx->data, ctx->data_size);
}
/**
* pvr_context_destroy_queues() - Destroy all queues attached to a context.
* @ctx: Context to destroy queues on.
*
* Should be called when the last reference to a context object is dropped.
* It releases all resources attached to the queues bound to this context.
*/
static void pvr_context_destroy_queues(struct pvr_context *ctx)
{
switch (ctx->type) {
case DRM_PVR_CTX_TYPE_RENDER:
pvr_queue_destroy(ctx->queues.fragment);
pvr_queue_destroy(ctx->queues.geometry);
break;
case DRM_PVR_CTX_TYPE_COMPUTE:
pvr_queue_destroy(ctx->queues.compute);
break;
case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
pvr_queue_destroy(ctx->queues.transfer);
break;
}
}
/**
* pvr_context_create_queues() - Create all queues attached to a context.
* @ctx: Context to create queues on.
* @args: Context creation arguments passed by userspace.
* @fw_ctx_map: CPU mapping of the FW context object.
*
* Return:
* * 0 on success, or
* * A negative error code otherwise.
*/
static int pvr_context_create_queues(struct pvr_context *ctx,
struct drm_pvr_ioctl_create_context_args *args,
void *fw_ctx_map)
{
int err;
switch (ctx->type) {
case DRM_PVR_CTX_TYPE_RENDER:
ctx->queues.geometry = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_GEOMETRY,
args, fw_ctx_map);
if (IS_ERR(ctx->queues.geometry)) {
err = PTR_ERR(ctx->queues.geometry);
ctx->queues.geometry = NULL;
goto err_destroy_queues;
}
ctx->queues.fragment = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_FRAGMENT,
args, fw_ctx_map);
if (IS_ERR(ctx->queues.fragment)) {
err = PTR_ERR(ctx->queues.fragment);
ctx->queues.fragment = NULL;
goto err_destroy_queues;
}
return 0;
case DRM_PVR_CTX_TYPE_COMPUTE:
ctx->queues.compute = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_COMPUTE,
args, fw_ctx_map);
if (IS_ERR(ctx->queues.compute)) {
err = PTR_ERR(ctx->queues.compute);
ctx->queues.compute = NULL;
goto err_destroy_queues;
}
return 0;
case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
ctx->queues.transfer = pvr_queue_create(ctx, DRM_PVR_JOB_TYPE_TRANSFER_FRAG,
args, fw_ctx_map);
if (IS_ERR(ctx->queues.transfer)) {
err = PTR_ERR(ctx->queues.transfer);
ctx->queues.transfer = NULL;
goto err_destroy_queues;
}
return 0;
}
return -EINVAL;
err_destroy_queues:
pvr_context_destroy_queues(ctx);
return err;
}
/**
* pvr_context_kill_queues() - Kill queues attached to context.
* @ctx: Context to kill queues on.
*
* Killing the queues implies making them unusable for future jobs, while still
* letting the currently submitted jobs a chance to finish. Queue resources will
* stay around until pvr_context_destroy_queues() is called.
*/
static void pvr_context_kill_queues(struct pvr_context *ctx)
{
switch (ctx->type) {
case DRM_PVR_CTX_TYPE_RENDER:
pvr_queue_kill(ctx->queues.fragment);
pvr_queue_kill(ctx->queues.geometry);
break;
case DRM_PVR_CTX_TYPE_COMPUTE:
pvr_queue_kill(ctx->queues.compute);
break;
case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
pvr_queue_kill(ctx->queues.transfer);
break;
}
}
/**
* pvr_context_create() - Create a context.
* @pvr_file: File to attach the created context to.
@ -214,10 +326,14 @@ int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_co
goto err_put_vm;
}
err = init_fw_objs(ctx, args, ctx->data);
err = pvr_context_create_queues(ctx, args, ctx->data);
if (err)
goto err_free_ctx_data;
err = init_fw_objs(ctx, args, ctx->data);
if (err)
goto err_destroy_queues;
err = pvr_fw_object_create(pvr_dev, ctx_size, PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
ctx_fw_data_init, ctx, &ctx->fw_obj);
if (err)
@ -243,6 +359,9 @@ int pvr_context_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_co
err_destroy_fw_obj:
pvr_fw_object_destroy(ctx->fw_obj);
err_destroy_queues:
pvr_context_destroy_queues(ctx);
err_free_ctx_data:
kfree(ctx->data);
@ -262,6 +381,7 @@ pvr_context_release(struct kref *ref_count)
struct pvr_device *pvr_dev = ctx->pvr_dev;
xa_erase(&pvr_dev->ctx_ids, ctx->ctx_id);
pvr_context_destroy_queues(ctx);
pvr_fw_object_destroy(ctx->fw_obj);
kfree(ctx->data);
pvr_vm_context_put(ctx->vm_ctx);
@ -299,6 +419,9 @@ pvr_context_destroy(struct pvr_file *pvr_file, u32 handle)
if (!ctx)
return -EINVAL;
/* Make sure nothing can be queued to the queues after that point. */
pvr_context_kill_queues(ctx);
/* Release the reference held by the handle set. */
pvr_context_put(ctx);

View File

@ -15,6 +15,7 @@
#include "pvr_cccb.h"
#include "pvr_device.h"
#include "pvr_queue.h"
/* Forward declaration from pvr_gem.h. */
struct pvr_fw_object;
@ -58,8 +59,51 @@ struct pvr_context {
/** @ctx_id: FW context ID. */
u32 ctx_id;
/**
* @faulty: Set to 1 when the context queues had unfinished job when
* a GPU reset happened.
*
* In that case, the context is in an inconsistent state and can't be
* used anymore.
*/
atomic_t faulty;
/** @queues: Union containing all kind of queues. */
union {
struct {
/** @geometry: Geometry queue. */
struct pvr_queue *geometry;
/** @fragment: Fragment queue. */
struct pvr_queue *fragment;
};
/** @compute: Compute queue. */
struct pvr_queue *compute;
/** @compute: Transfer queue. */
struct pvr_queue *transfer;
} queues;
};
static __always_inline struct pvr_queue *
pvr_context_get_queue_for_job(struct pvr_context *ctx, enum drm_pvr_job_type type)
{
switch (type) {
case DRM_PVR_JOB_TYPE_GEOMETRY:
return ctx->type == DRM_PVR_CTX_TYPE_RENDER ? ctx->queues.geometry : NULL;
case DRM_PVR_JOB_TYPE_FRAGMENT:
return ctx->type == DRM_PVR_CTX_TYPE_RENDER ? ctx->queues.fragment : NULL;
case DRM_PVR_JOB_TYPE_COMPUTE:
return ctx->type == DRM_PVR_CTX_TYPE_COMPUTE ? ctx->queues.compute : NULL;
case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
return ctx->type == DRM_PVR_CTX_TYPE_TRANSFER_FRAG ? ctx->queues.transfer : NULL;
}
return NULL;
}
/**
* pvr_context_get() - Take additional reference on context.
* @ctx: Context pointer.

View File

@ -6,7 +6,9 @@
#include "pvr_fw.h"
#include "pvr_power.h"
#include "pvr_queue.h"
#include "pvr_rogue_cr_defs.h"
#include "pvr_stream.h"
#include "pvr_vm.h"
#include <drm/drm_print.h>
@ -117,6 +119,32 @@ static int pvr_device_clk_init(struct pvr_device *pvr_dev)
return 0;
}
/**
* pvr_device_process_active_queues() - Process all queue related events.
* @pvr_dev: PowerVR device to check
*
* This is called any time we receive a FW event. It iterates over all
* active queues and calls pvr_queue_process() on them.
*/
void pvr_device_process_active_queues(struct pvr_device *pvr_dev)
{
struct pvr_queue *queue, *tmp_queue;
LIST_HEAD(active_queues);
mutex_lock(&pvr_dev->queues.lock);
/* Move all active queues to a temporary list. Queues that remain
* active after we're done processing them are re-inserted to
* the queues.active list by pvr_queue_process().
*/
list_splice_init(&pvr_dev->queues.active, &active_queues);
list_for_each_entry_safe(queue, tmp_queue, &active_queues, node)
pvr_queue_process(queue);
mutex_unlock(&pvr_dev->queues.lock);
}
static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data)
{
struct pvr_device *pvr_dev = data;
@ -132,6 +160,7 @@ static irqreturn_t pvr_device_irq_thread_handler(int irq, void *data)
if (pvr_dev->fw_dev.booted) {
pvr_fwccb_process(pvr_dev);
pvr_kccb_wake_up_waiters(pvr_dev);
pvr_device_process_active_queues(pvr_dev);
}
pm_runtime_mark_last_busy(from_pvr_device(pvr_dev)->dev);
@ -398,6 +427,8 @@ pvr_device_gpu_init(struct pvr_device *pvr_dev)
else
return -EINVAL;
pvr_stream_create_musthave_masks(pvr_dev);
err = pvr_set_dma_info(pvr_dev);
if (err)
return err;

View File

@ -173,6 +173,26 @@ struct pvr_device {
*/
struct xarray free_list_ids;
/**
* @job_ids: Array of jobs belonging to this device. Array members
* are of type "struct pvr_job *".
*/
struct xarray job_ids;
/**
* @queues: Queue-related fields.
*/
struct {
/** @active: Active queue list. */
struct list_head active;
/** @idle: Idle queue list. */
struct list_head idle;
/** @lock: Lock protecting access to the active/idle lists. */
struct mutex lock;
} queues;
struct {
/** @work: Work item for watchdog callback. */
struct delayed_work work;
@ -442,6 +462,7 @@ packed_bvnc_to_pvr_gpu_id(u64 bvnc, struct pvr_gpu_id *gpu_id)
int pvr_device_init(struct pvr_device *pvr_dev);
void pvr_device_fini(struct pvr_device *pvr_dev);
void pvr_device_reset(struct pvr_device *pvr_dev);
bool
pvr_device_has_uapi_quirk(struct pvr_device *pvr_dev, u32 quirk);

View File

@ -7,6 +7,7 @@
#include "pvr_free_list.h"
#include "pvr_gem.h"
#include "pvr_hwrt.h"
#include "pvr_job.h"
#include "pvr_mmu.h"
#include "pvr_power.h"
#include "pvr_rogue_defs.h"
@ -32,6 +33,8 @@
#include <linux/of_device.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/xarray.h>
/**
* DOC: PowerVR (Series 6 and later) and IMG Graphics Driver
@ -397,7 +400,8 @@ pvr_dev_query_runtime_info_get(struct pvr_device *pvr_dev,
return 0;
}
runtime_info.free_list_min_pages = 0; /* FIXME */
runtime_info.free_list_min_pages =
pvr_get_free_list_min_pages(pvr_dev);
runtime_info.free_list_max_pages =
ROGUE_PM_MAX_FREELIST_SIZE / ROGUE_PM_PAGE_SIZE;
runtime_info.common_store_alloc_region_size =
@ -1137,7 +1141,20 @@ static int
pvr_ioctl_submit_jobs(struct drm_device *drm_dev, void *raw_args,
struct drm_file *file)
{
return -ENOTTY;
struct drm_pvr_ioctl_submit_jobs_args *args = raw_args;
struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
struct pvr_file *pvr_file = to_pvr_file(file);
int idx;
int err;
if (!drm_dev_enter(drm_dev, &idx))
return -EIO;
err = pvr_submit_jobs(pvr_dev, pvr_file, args);
drm_dev_exit(idx);
return err;
}
int
@ -1353,7 +1370,8 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
DEFINE_DRM_GEM_FOPS(pvr_drm_driver_fops);
static struct drm_driver pvr_drm_driver = {
.driver_features = DRIVER_GEM | DRIVER_GEM_GPUVA | DRIVER_RENDER,
.driver_features = DRIVER_GEM | DRIVER_GEM_GPUVA | DRIVER_RENDER |
DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE,
.open = pvr_drm_driver_open,
.postclose = pvr_drm_driver_postclose,
.ioctls = pvr_drm_driver_ioctls,
@ -1386,8 +1404,15 @@ pvr_probe(struct platform_device *plat_dev)
drm_dev = &pvr_dev->base;
platform_set_drvdata(plat_dev, drm_dev);
init_rwsem(&pvr_dev->reset_sem);
pvr_context_device_init(pvr_dev);
err = pvr_queue_device_init(pvr_dev);
if (err)
goto err_context_fini;
devm_pm_runtime_enable(&plat_dev->dev);
pm_runtime_mark_last_busy(&plat_dev->dev);
@ -1404,6 +1429,7 @@ pvr_probe(struct platform_device *plat_dev)
goto err_device_fini;
xa_init_flags(&pvr_dev->free_list_ids, XA_FLAGS_ALLOC1);
xa_init_flags(&pvr_dev->job_ids, XA_FLAGS_ALLOC1);
return 0;
@ -1413,6 +1439,11 @@ err_device_fini:
err_watchdog_fini:
pvr_watchdog_fini(pvr_dev);
pvr_queue_device_fini(pvr_dev);
err_context_fini:
pvr_context_device_fini(pvr_dev);
return err;
}
@ -1422,14 +1453,17 @@ pvr_remove(struct platform_device *plat_dev)
struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
WARN_ON(!xa_empty(&pvr_dev->job_ids));
WARN_ON(!xa_empty(&pvr_dev->free_list_ids));
xa_destroy(&pvr_dev->job_ids);
xa_destroy(&pvr_dev->free_list_ids);
pm_runtime_suspend(drm_dev->dev);
pvr_device_fini(pvr_dev);
drm_dev_unplug(drm_dev);
pvr_watchdog_fini(pvr_dev);
pvr_queue_device_fini(pvr_dev);
pvr_context_device_fini(pvr_dev);
return 0;

View File

@ -0,0 +1,788 @@
// SPDX-License-Identifier: GPL-2.0-only OR MIT
/* Copyright (c) 2023 Imagination Technologies Ltd. */
#include "pvr_context.h"
#include "pvr_device.h"
#include "pvr_drv.h"
#include "pvr_gem.h"
#include "pvr_hwrt.h"
#include "pvr_job.h"
#include "pvr_mmu.h"
#include "pvr_power.h"
#include "pvr_rogue_fwif.h"
#include "pvr_rogue_fwif_client.h"
#include "pvr_stream.h"
#include "pvr_stream_defs.h"
#include "pvr_sync.h"
#include <drm/drm_exec.h>
#include <drm/drm_gem.h>
#include <linux/types.h>
#include <uapi/drm/pvr_drm.h>
static void pvr_job_release(struct kref *kref)
{
struct pvr_job *job = container_of(kref, struct pvr_job, ref_count);
xa_erase(&job->pvr_dev->job_ids, job->id);
pvr_hwrt_data_put(job->hwrt);
pvr_context_put(job->ctx);
WARN_ON(job->paired_job);
pvr_queue_job_cleanup(job);
pvr_job_release_pm_ref(job);
kfree(job->cmd);
kfree(job);
}
/**
* pvr_job_put() - Release reference on job
* @job: Target job.
*/
void
pvr_job_put(struct pvr_job *job)
{
if (job)
kref_put(&job->ref_count, pvr_job_release);
}
/**
* pvr_job_process_stream() - Build job FW structure from stream
* @pvr_dev: Device pointer.
* @cmd_defs: Stream definition.
* @stream: Pointer to command stream.
* @stream_size: Size of command stream, in bytes.
* @job: Pointer to job.
*
* Caller is responsible for freeing the output structure.
*
* Returns:
* * 0 on success,
* * -%ENOMEM on out of memory, or
* * -%EINVAL on malformed stream.
*/
static int
pvr_job_process_stream(struct pvr_device *pvr_dev, const struct pvr_stream_cmd_defs *cmd_defs,
void *stream, u32 stream_size, struct pvr_job *job)
{
int err;
job->cmd = kzalloc(cmd_defs->dest_size, GFP_KERNEL);
if (!job->cmd)
return -ENOMEM;
job->cmd_len = cmd_defs->dest_size;
err = pvr_stream_process(pvr_dev, cmd_defs, stream, stream_size, job->cmd);
if (err)
kfree(job->cmd);
return err;
}
static int pvr_fw_cmd_init(struct pvr_device *pvr_dev, struct pvr_job *job,
const struct pvr_stream_cmd_defs *stream_def,
u64 stream_userptr, u32 stream_len)
{
void *stream;
int err;
stream = kzalloc(stream_len, GFP_KERNEL);
if (!stream)
return -ENOMEM;
if (copy_from_user(stream, u64_to_user_ptr(stream_userptr), stream_len)) {
err = -EFAULT;
goto err_free_stream;
}
err = pvr_job_process_stream(pvr_dev, stream_def, stream, stream_len, job);
err_free_stream:
kfree(stream);
return err;
}
static u32
convert_geom_flags(u32 in_flags)
{
u32 out_flags = 0;
if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_FIRST)
out_flags |= ROGUE_GEOM_FLAGS_FIRSTKICK;
if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_LAST)
out_flags |= ROGUE_GEOM_FLAGS_LASTKICK;
if (in_flags & DRM_PVR_SUBMIT_JOB_GEOM_CMD_SINGLE_CORE)
out_flags |= ROGUE_GEOM_FLAGS_SINGLE_CORE;
return out_flags;
}
static u32
convert_frag_flags(u32 in_flags)
{
u32 out_flags = 0;
if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_SINGLE_CORE)
out_flags |= ROGUE_FRAG_FLAGS_SINGLE_CORE;
if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_DEPTHBUFFER)
out_flags |= ROGUE_FRAG_FLAGS_DEPTHBUFFER;
if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_STENCILBUFFER)
out_flags |= ROGUE_FRAG_FLAGS_STENCILBUFFER;
if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_PREVENT_CDM_OVERLAP)
out_flags |= ROGUE_FRAG_FLAGS_PREVENT_CDM_OVERLAP;
if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_SCRATCHBUFFER)
out_flags |= ROGUE_FRAG_FLAGS_SCRATCHBUFFER;
if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_GET_VIS_RESULTS)
out_flags |= ROGUE_FRAG_FLAGS_GET_VIS_RESULTS;
if (in_flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE)
out_flags |= ROGUE_FRAG_FLAGS_DISABLE_PIXELMERGE;
return out_flags;
}
static int
pvr_geom_job_fw_cmd_init(struct pvr_job *job,
struct drm_pvr_job *args)
{
struct rogue_fwif_cmd_geom *cmd;
int err;
if (args->flags & ~DRM_PVR_SUBMIT_JOB_GEOM_CMD_FLAGS_MASK)
return -EINVAL;
if (job->ctx->type != DRM_PVR_CTX_TYPE_RENDER)
return -EINVAL;
if (!job->hwrt)
return -EINVAL;
job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_GEOM;
err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_geom_stream,
args->cmd_stream, args->cmd_stream_len);
if (err)
return err;
cmd = job->cmd;
cmd->cmd_shared.cmn.frame_num = 0;
cmd->flags = convert_geom_flags(args->flags);
pvr_fw_object_get_fw_addr(job->hwrt->fw_obj, &cmd->cmd_shared.hwrt_data_fw_addr);
return 0;
}
static int
pvr_frag_job_fw_cmd_init(struct pvr_job *job,
struct drm_pvr_job *args)
{
struct rogue_fwif_cmd_frag *cmd;
int err;
if (args->flags & ~DRM_PVR_SUBMIT_JOB_FRAG_CMD_FLAGS_MASK)
return -EINVAL;
if (job->ctx->type != DRM_PVR_CTX_TYPE_RENDER)
return -EINVAL;
if (!job->hwrt)
return -EINVAL;
job->fw_ccb_cmd_type = (args->flags & DRM_PVR_SUBMIT_JOB_FRAG_CMD_PARTIAL_RENDER) ?
ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR :
ROGUE_FWIF_CCB_CMD_TYPE_FRAG;
err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_frag_stream,
args->cmd_stream, args->cmd_stream_len);
if (err)
return err;
cmd = job->cmd;
cmd->cmd_shared.cmn.frame_num = 0;
cmd->flags = convert_frag_flags(args->flags);
pvr_fw_object_get_fw_addr(job->hwrt->fw_obj, &cmd->cmd_shared.hwrt_data_fw_addr);
return 0;
}
static u32
convert_compute_flags(u32 in_flags)
{
u32 out_flags = 0;
if (in_flags & DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_PREVENT_ALL_OVERLAP)
out_flags |= ROGUE_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
if (in_flags & DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_SINGLE_CORE)
out_flags |= ROGUE_COMPUTE_FLAG_SINGLE_CORE;
return out_flags;
}
static int
pvr_compute_job_fw_cmd_init(struct pvr_job *job,
struct drm_pvr_job *args)
{
struct rogue_fwif_cmd_compute *cmd;
int err;
if (args->flags & ~DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_FLAGS_MASK)
return -EINVAL;
if (job->ctx->type != DRM_PVR_CTX_TYPE_COMPUTE)
return -EINVAL;
job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_CDM;
err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_compute_stream,
args->cmd_stream, args->cmd_stream_len);
if (err)
return err;
cmd = job->cmd;
cmd->common.frame_num = 0;
cmd->flags = convert_compute_flags(args->flags);
return 0;
}
static u32
convert_transfer_flags(u32 in_flags)
{
u32 out_flags = 0;
if (in_flags & DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_SINGLE_CORE)
out_flags |= ROGUE_TRANSFER_FLAGS_SINGLE_CORE;
return out_flags;
}
static int
pvr_transfer_job_fw_cmd_init(struct pvr_job *job,
struct drm_pvr_job *args)
{
struct rogue_fwif_cmd_transfer *cmd;
int err;
if (args->flags & ~DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_FLAGS_MASK)
return -EINVAL;
if (job->ctx->type != DRM_PVR_CTX_TYPE_TRANSFER_FRAG)
return -EINVAL;
job->fw_ccb_cmd_type = ROGUE_FWIF_CCB_CMD_TYPE_TQ_3D;
err = pvr_fw_cmd_init(job->pvr_dev, job, &pvr_cmd_transfer_stream,
args->cmd_stream, args->cmd_stream_len);
if (err)
return err;
cmd = job->cmd;
cmd->common.frame_num = 0;
cmd->flags = convert_transfer_flags(args->flags);
return 0;
}
static int
pvr_job_fw_cmd_init(struct pvr_job *job,
struct drm_pvr_job *args)
{
switch (args->type) {
case DRM_PVR_JOB_TYPE_GEOMETRY:
return pvr_geom_job_fw_cmd_init(job, args);
case DRM_PVR_JOB_TYPE_FRAGMENT:
return pvr_frag_job_fw_cmd_init(job, args);
case DRM_PVR_JOB_TYPE_COMPUTE:
return pvr_compute_job_fw_cmd_init(job, args);
case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
return pvr_transfer_job_fw_cmd_init(job, args);
default:
return -EINVAL;
}
}
/**
* struct pvr_job_data - Helper container for pairing jobs with the
* sync_ops supplied for them by the user.
*/
struct pvr_job_data {
/** @job: Pointer to the job. */
struct pvr_job *job;
/** @sync_ops: Pointer to the sync_ops associated with @job. */
struct drm_pvr_sync_op *sync_ops;
/** @sync_op_count: Number of members of @sync_ops. */
u32 sync_op_count;
};
/**
* prepare_job_syncs() - Prepare all sync objects for a single job.
* @pvr_file: PowerVR file.
* @job_data: Precreated job and sync_ops array.
* @signal_array: xarray to receive signal sync objects.
*
* Returns:
* * 0 on success, or
* * Any error code returned by pvr_sync_signal_array_collect_ops(),
* pvr_sync_add_deps_to_job(), drm_sched_job_add_resv_dependencies() or
* pvr_sync_signal_array_update_fences().
*/
static int
prepare_job_syncs(struct pvr_file *pvr_file,
struct pvr_job_data *job_data,
struct xarray *signal_array)
{
struct dma_fence *done_fence;
int err = pvr_sync_signal_array_collect_ops(signal_array,
from_pvr_file(pvr_file),
job_data->sync_op_count,
job_data->sync_ops);
if (err)
return err;
err = pvr_sync_add_deps_to_job(pvr_file, &job_data->job->base,
job_data->sync_op_count,
job_data->sync_ops, signal_array);
if (err)
return err;
if (job_data->job->hwrt) {
/* The geometry job writes the HWRT region headers, which are
* then read by the fragment job.
*/
struct drm_gem_object *obj =
gem_from_pvr_gem(job_data->job->hwrt->fw_obj->gem);
enum dma_resv_usage usage =
dma_resv_usage_rw(job_data->job->type ==
DRM_PVR_JOB_TYPE_GEOMETRY);
dma_resv_lock(obj->resv, NULL);
err = drm_sched_job_add_resv_dependencies(&job_data->job->base,
obj->resv, usage);
dma_resv_unlock(obj->resv);
if (err)
return err;
}
/* We need to arm the job to get the job done fence. */
done_fence = pvr_queue_job_arm(job_data->job);
err = pvr_sync_signal_array_update_fences(signal_array,
job_data->sync_op_count,
job_data->sync_ops,
done_fence);
return err;
}
/**
* prepare_job_syncs_for_each() - Prepare all sync objects for an array of jobs.
* @file: PowerVR file.
* @job_data: Array of precreated jobs and their sync_ops.
* @job_count: Number of jobs.
* @signal_array: xarray to receive signal sync objects.
*
* Returns:
* * 0 on success, or
* * Any error code returned by pvr_vm_bind_job_prepare_syncs().
*/
static int
prepare_job_syncs_for_each(struct pvr_file *pvr_file,
struct pvr_job_data *job_data,
u32 *job_count,
struct xarray *signal_array)
{
for (u32 i = 0; i < *job_count; i++) {
int err = prepare_job_syncs(pvr_file, &job_data[i],
signal_array);
if (err) {
*job_count = i;
return err;
}
}
return 0;
}
static struct pvr_job *
create_job(struct pvr_device *pvr_dev,
struct pvr_file *pvr_file,
struct drm_pvr_job *args)
{
struct pvr_job *job = NULL;
int err;
if (!args->cmd_stream || !args->cmd_stream_len)
return ERR_PTR(-EINVAL);
if (args->type != DRM_PVR_JOB_TYPE_GEOMETRY &&
args->type != DRM_PVR_JOB_TYPE_FRAGMENT &&
(args->hwrt.set_handle || args->hwrt.data_index))
return ERR_PTR(-EINVAL);
job = kzalloc(sizeof(*job), GFP_KERNEL);
if (!job)
return ERR_PTR(-ENOMEM);
kref_init(&job->ref_count);
job->type = args->type;
job->pvr_dev = pvr_dev;
err = xa_alloc(&pvr_dev->job_ids, &job->id, job, xa_limit_32b, GFP_KERNEL);
if (err)
goto err_put_job;
job->ctx = pvr_context_lookup(pvr_file, args->context_handle);
if (!job->ctx) {
err = -EINVAL;
goto err_put_job;
}
if (args->hwrt.set_handle) {
job->hwrt = pvr_hwrt_data_lookup(pvr_file, args->hwrt.set_handle,
args->hwrt.data_index);
if (!job->hwrt) {
err = -EINVAL;
goto err_put_job;
}
}
err = pvr_job_fw_cmd_init(job, args);
if (err)
goto err_put_job;
err = pvr_queue_job_init(job);
if (err)
goto err_put_job;
return job;
err_put_job:
pvr_job_put(job);
return ERR_PTR(err);
}
/**
* pvr_job_data_fini() - Cleanup all allocs used to set up job submission.
* @job_data: Job data array.
* @job_count: Number of members of @job_data.
*/
static void
pvr_job_data_fini(struct pvr_job_data *job_data, u32 job_count)
{
for (u32 i = 0; i < job_count; i++) {
pvr_job_put(job_data[i].job);
kvfree(job_data[i].sync_ops);
}
}
/**
* pvr_job_data_init() - Init an array of created jobs, associating them with
* the appropriate sync_ops args, which will be copied in.
* @pvr_dev: Target PowerVR device.
* @pvr_file: Pointer to PowerVR file structure.
* @job_args: Job args array copied from user.
* @job_count: Number of members of @job_args.
* @job_data_out: Job data array.
*/
static int pvr_job_data_init(struct pvr_device *pvr_dev,
struct pvr_file *pvr_file,
struct drm_pvr_job *job_args,
u32 *job_count,
struct pvr_job_data *job_data_out)
{
int err = 0, i = 0;
for (; i < *job_count; i++) {
job_data_out[i].job =
create_job(pvr_dev, pvr_file, &job_args[i]);
err = PTR_ERR_OR_ZERO(job_data_out[i].job);
if (err) {
*job_count = i;
job_data_out[i].job = NULL;
goto err_cleanup;
}
err = PVR_UOBJ_GET_ARRAY(job_data_out[i].sync_ops,
&job_args[i].sync_ops);
if (err) {
*job_count = i;
/* Ensure the job created above is also cleaned up. */
i++;
goto err_cleanup;
}
job_data_out[i].sync_op_count = job_args[i].sync_ops.count;
}
return 0;
err_cleanup:
pvr_job_data_fini(job_data_out, i);
return err;
}
static void
push_jobs(struct pvr_job_data *job_data, u32 job_count)
{
for (u32 i = 0; i < job_count; i++)
pvr_queue_job_push(job_data[i].job);
}
static int
prepare_fw_obj_resv(struct drm_exec *exec, struct pvr_fw_object *fw_obj)
{
return drm_exec_prepare_obj(exec, gem_from_pvr_gem(fw_obj->gem), 1);
}
static int
jobs_lock_all_objs(struct drm_exec *exec, struct pvr_job_data *job_data,
u32 job_count)
{
for (u32 i = 0; i < job_count; i++) {
struct pvr_job *job = job_data[i].job;
/* Grab a lock on a the context, to guard against
* concurrent submission to the same queue.
*/
int err = drm_exec_lock_obj(exec,
gem_from_pvr_gem(job->ctx->fw_obj->gem));
if (err)
return err;
if (job->hwrt) {
err = prepare_fw_obj_resv(exec,
job->hwrt->fw_obj);
if (err)
return err;
}
}
return 0;
}
static int
prepare_job_resvs_for_each(struct drm_exec *exec, struct pvr_job_data *job_data,
u32 job_count)
{
drm_exec_until_all_locked(exec) {
int err = jobs_lock_all_objs(exec, job_data, job_count);
drm_exec_retry_on_contention(exec);
if (err)
return err;
}
return 0;
}
static void
update_job_resvs(struct pvr_job *job)
{
if (job->hwrt) {
enum dma_resv_usage usage = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ?
DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ;
struct drm_gem_object *obj = gem_from_pvr_gem(job->hwrt->fw_obj->gem);
dma_resv_add_fence(obj->resv, &job->base.s_fence->finished, usage);
}
}
static void
update_job_resvs_for_each(struct pvr_job_data *job_data, u32 job_count)
{
for (u32 i = 0; i < job_count; i++)
update_job_resvs(job_data[i].job);
}
static bool can_combine_jobs(struct pvr_job *a, struct pvr_job *b)
{
struct pvr_job *geom_job = a, *frag_job = b;
struct dma_fence *fence;
unsigned long index;
/* Geometry and fragment jobs can be combined if they are queued to the
* same context and targeting the same HWRT.
*/
if (a->type != DRM_PVR_JOB_TYPE_GEOMETRY ||
b->type != DRM_PVR_JOB_TYPE_FRAGMENT ||
a->ctx != b->ctx ||
a->hwrt != b->hwrt)
return false;
xa_for_each(&frag_job->base.dependencies, index, fence) {
/* We combine when we see an explicit geom -> frag dep. */
if (&geom_job->base.s_fence->scheduled == fence)
return true;
}
return false;
}
static struct dma_fence *
get_last_queued_job_scheduled_fence(struct pvr_queue *queue,
struct pvr_job_data *job_data,
u32 cur_job_pos)
{
/* We iterate over the current job array in reverse order to grab the
* last to-be-queued job targeting the same queue.
*/
for (u32 i = cur_job_pos; i > 0; i--) {
struct pvr_job *job = job_data[i - 1].job;
if (job->ctx == queue->ctx && job->type == queue->type)
return dma_fence_get(&job->base.s_fence->scheduled);
}
/* If we didn't find any, we just return the last queued job scheduled
* fence attached to the queue.
*/
return dma_fence_get(queue->last_queued_job_scheduled_fence);
}
static int
pvr_jobs_link_geom_frag(struct pvr_job_data *job_data, u32 *job_count)
{
for (u32 i = 0; i < *job_count - 1; i++) {
struct pvr_job *geom_job = job_data[i].job;
struct pvr_job *frag_job = job_data[i + 1].job;
struct pvr_queue *frag_queue;
struct dma_fence *f;
if (!can_combine_jobs(job_data[i].job, job_data[i + 1].job))
continue;
/* The fragment job will be submitted by the geometry queue. We
* need to make sure it comes after all the other fragment jobs
* queued before it.
*/
frag_queue = pvr_context_get_queue_for_job(frag_job->ctx,
frag_job->type);
f = get_last_queued_job_scheduled_fence(frag_queue, job_data,
i);
if (f) {
int err = drm_sched_job_add_dependency(&geom_job->base,
f);
if (err) {
*job_count = i;
return err;
}
}
/* The KCCB slot will be reserved by the geometry job, so we can
* drop the KCCB fence on the fragment job.
*/
pvr_kccb_fence_put(frag_job->kccb_fence);
frag_job->kccb_fence = NULL;
geom_job->paired_job = frag_job;
frag_job->paired_job = geom_job;
/* Skip the fragment job we just paired to the geometry job. */
i++;
}
return 0;
}
/**
* pvr_submit_jobs() - Submit jobs to the GPU
* @pvr_dev: Target PowerVR device.
* @pvr_file: Pointer to PowerVR file structure.
* @args: Ioctl args.
* @job_count: Number of jobs in @jobs_args. On error this will be updated
* with the index into @jobs_args where the error occurred.
*
* This initial implementation is entirely synchronous; on return the GPU will
* be idle. This will not be the case for future implementations.
*
* Returns:
* * 0 on success,
* * -%EFAULT if arguments can not be copied from user space, or
* * -%EINVAL on invalid arguments, or
* * Any other error.
*/
int
pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file,
struct drm_pvr_ioctl_submit_jobs_args *args)
{
struct pvr_job_data *job_data = NULL;
struct drm_pvr_job *job_args;
struct xarray signal_array;
u32 jobs_alloced = 0;
struct drm_exec exec;
int err;
if (!args->jobs.count)
return -EINVAL;
err = PVR_UOBJ_GET_ARRAY(job_args, &args->jobs);
if (err)
return err;
job_data = kvmalloc_array(args->jobs.count, sizeof(*job_data),
GFP_KERNEL | __GFP_ZERO);
if (!job_data) {
err = -ENOMEM;
goto out_free;
}
err = pvr_job_data_init(pvr_dev, pvr_file, job_args, &args->jobs.count,
job_data);
if (err)
goto out_free;
jobs_alloced = args->jobs.count;
/*
* Flush MMU if needed - this has been deferred until now to avoid
* overuse of this expensive operation.
*/
err = pvr_mmu_flush_exec(pvr_dev, false);
if (err)
goto out_job_data_cleanup;
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES);
xa_init_flags(&signal_array, XA_FLAGS_ALLOC);
err = prepare_job_syncs_for_each(pvr_file, job_data, &args->jobs.count,
&signal_array);
if (err)
goto out_exec_fini;
err = prepare_job_resvs_for_each(&exec, job_data, args->jobs.count);
if (err)
goto out_exec_fini;
err = pvr_jobs_link_geom_frag(job_data, &args->jobs.count);
if (err)
goto out_exec_fini;
/* Anything after that point must succeed because we start exposing job
* finished fences to the outside world.
*/
update_job_resvs_for_each(job_data, args->jobs.count);
push_jobs(job_data, args->jobs.count);
pvr_sync_signal_array_push_fences(&signal_array);
err = 0;
out_exec_fini:
drm_exec_fini(&exec);
pvr_sync_signal_array_cleanup(&signal_array);
out_job_data_cleanup:
pvr_job_data_fini(job_data, jobs_alloced);
out_free:
kvfree(job_data);
kvfree(job_args);
return err;
}

View File

@ -0,0 +1,161 @@
/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
/* Copyright (c) 2023 Imagination Technologies Ltd. */
#ifndef PVR_JOB_H
#define PVR_JOB_H
#include <uapi/drm/pvr_drm.h>
#include <linux/kref.h>
#include <linux/types.h>
#include <drm/drm_gem.h>
#include <drm/gpu_scheduler.h>
#include "pvr_power.h"
/* Forward declaration from "pvr_context.h". */
struct pvr_context;
/* Forward declarations from "pvr_device.h". */
struct pvr_device;
struct pvr_file;
/* Forward declarations from "pvr_hwrt.h". */
struct pvr_hwrt_data;
/* Forward declaration from "pvr_queue.h". */
struct pvr_queue;
struct pvr_job {
/** @base: drm_sched_job object. */
struct drm_sched_job base;
/** @ref_count: Refcount for job. */
struct kref ref_count;
/** @type: Type of job. */
enum drm_pvr_job_type type;
/** @id: Job ID number. */
u32 id;
/**
* @paired_job: Job paired to this job.
*
* This field is only meaningful for geometry and fragment jobs.
*
* Paired jobs are executed on the same context, and need to be submitted
* atomically to the FW, to make sure the partial render logic has a
* fragment job to execute when the Parameter Manager runs out of memory.
*
* The geometry job should point to the fragment job it's paired with,
* and the fragment job should point to the geometry job it's paired with.
*/
struct pvr_job *paired_job;
/** @cccb_fence: Fence used to wait for CCCB space. */
struct dma_fence *cccb_fence;
/** @kccb_fence: Fence used to wait for KCCB space. */
struct dma_fence *kccb_fence;
/** @done_fence: Fence to signal when the job is done. */
struct dma_fence *done_fence;
/** @pvr_dev: Device pointer. */
struct pvr_device *pvr_dev;
/** @ctx: Pointer to owning context. */
struct pvr_context *ctx;
/** @cmd: Command data. Format depends on @type. */
void *cmd;
/** @cmd_len: Length of command data, in bytes. */
u32 cmd_len;
/**
* @fw_ccb_cmd_type: Firmware CCB command type. Must be one of %ROGUE_FWIF_CCB_CMD_TYPE_*.
*/
u32 fw_ccb_cmd_type;
/** @hwrt: HWRT object. Will be NULL for compute and transfer jobs. */
struct pvr_hwrt_data *hwrt;
/**
* @has_pm_ref: True if the job has a power ref, thus forcing the GPU to stay on until
* the job is done.
*/
bool has_pm_ref;
};
/**
* pvr_job_get() - Take additional reference on job.
* @job: Job pointer.
*
* Call pvr_job_put() to release.
*
* Returns:
* * The requested job on success, or
* * %NULL if no job pointer passed.
*/
static __always_inline struct pvr_job *
pvr_job_get(struct pvr_job *job)
{
if (job)
kref_get(&job->ref_count);
return job;
}
void pvr_job_put(struct pvr_job *job);
/**
* pvr_job_release_pm_ref() - Release the PM ref if the job acquired it.
* @job: The job to release the PM ref on.
*/
static __always_inline void
pvr_job_release_pm_ref(struct pvr_job *job)
{
if (job->has_pm_ref) {
pvr_power_put(job->pvr_dev);
job->has_pm_ref = false;
}
}
/**
* pvr_job_get_pm_ref() - Get a PM ref and attach it to the job.
* @job: The job to attach the PM ref to.
*
* Return:
* * 0 on success, or
* * Any error returned by pvr_power_get() otherwise.
*/
static __always_inline int
pvr_job_get_pm_ref(struct pvr_job *job)
{
int err;
if (job->has_pm_ref)
return 0;
err = pvr_power_get(job->pvr_dev);
if (!err)
job->has_pm_ref = true;
return err;
}
int pvr_job_wait_first_non_signaled_native_dep(struct pvr_job *job);
bool pvr_job_non_native_deps_done(struct pvr_job *job);
int pvr_job_fits_in_cccb(struct pvr_job *job, unsigned long native_dep_count);
void pvr_job_submit(struct pvr_job *job);
int pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file,
struct drm_pvr_ioctl_submit_jobs_args *args);
#endif /* PVR_JOB_H */

View File

@ -5,6 +5,7 @@
#include "pvr_fw.h"
#include "pvr_fw_startstop.h"
#include "pvr_power.h"
#include "pvr_queue.h"
#include "pvr_rogue_fwif.h"
#include <drm/drm_drv.h>
@ -155,6 +156,21 @@ pvr_watchdog_kccb_stalled(struct pvr_device *pvr_dev)
pvr_dev->watchdog.kccb_stall_count = 0;
return true;
}
} else if (pvr_dev->watchdog.old_kccb_cmds_executed == kccb_cmds_executed) {
bool has_active_contexts;
mutex_lock(&pvr_dev->queues.lock);
has_active_contexts = list_empty(&pvr_dev->queues.active);
mutex_unlock(&pvr_dev->queues.lock);
if (has_active_contexts) {
/* Send a HEALTH_CHECK command so we can verify FW is still alive. */
struct rogue_fwif_kccb_cmd health_check_cmd;
health_check_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_HEALTH_CHECK;
pvr_kccb_send_cmd_powered(pvr_dev, &health_check_cmd, NULL);
}
} else {
pvr_dev->watchdog.old_kccb_cmds_executed = kccb_cmds_executed;
pvr_dev->watchdog.kccb_stall_count = 0;
@ -318,6 +334,7 @@ pvr_power_device_idle(struct device *dev)
int
pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
{
bool queues_disabled = false;
int err;
/*
@ -337,6 +354,11 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
disable_irq(pvr_dev->irq);
do {
if (hard_reset) {
pvr_queue_device_pre_reset(pvr_dev);
queues_disabled = true;
}
err = pvr_power_fw_disable(pvr_dev, hard_reset);
if (!err) {
if (hard_reset) {
@ -372,6 +394,9 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
}
} while (err);
if (queues_disabled)
pvr_queue_device_post_reset(pvr_dev);
enable_irq(pvr_dev->irq);
up_write(&pvr_dev->reset_sem);
@ -386,6 +411,9 @@ err_device_lost:
/* Leave IRQs disabled if the device is lost. */
if (queues_disabled)
pvr_queue_device_post_reset(pvr_dev);
err_up_write:
up_write(&pvr_dev->reset_sem);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,169 @@
/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
/* Copyright (c) 2023 Imagination Technologies Ltd. */
#ifndef PVR_QUEUE_H
#define PVR_QUEUE_H
#include <drm/gpu_scheduler.h>
#include "pvr_cccb.h"
#include "pvr_device.h"
struct pvr_context;
struct pvr_queue;
/**
* struct pvr_queue_fence_ctx - Queue fence context
*
* Used to implement dma_fence_ops for pvr_job::{done,cccb}_fence.
*/
struct pvr_queue_fence_ctx {
/** @id: Fence context ID allocated with dma_fence_context_alloc(). */
u64 id;
/** @seqno: Sequence number incremented each time a fence is created. */
atomic_t seqno;
/** @lock: Lock used to synchronize access to fences allocated by this context. */
spinlock_t lock;
};
/**
* struct pvr_queue_cccb_fence_ctx - CCCB fence context
*
* Context used to manage fences controlling access to the CCCB. No fences are
* issued if there's enough space in the CCCB to push job commands.
*/
struct pvr_queue_cccb_fence_ctx {
/** @base: Base queue fence context. */
struct pvr_queue_fence_ctx base;
/**
* @job: Job waiting for CCCB space.
*
* Thanks to the serializationg done at the drm_sched_entity level,
* there's no more than one job waiting for CCCB at a given time.
*
* This field is NULL if no jobs are currently waiting for CCCB space.
*
* Must be accessed with @job_lock held.
*/
struct pvr_job *job;
/** @lock: Lock protecting access to the job object. */
struct mutex job_lock;
};
/**
* struct pvr_queue_fence - Queue fence object
*/
struct pvr_queue_fence {
/** @base: Base dma_fence. */
struct dma_fence base;
/** @queue: Queue that created this fence. */
struct pvr_queue *queue;
};
/**
* struct pvr_queue - Job queue
*
* Used to queue and track execution of pvr_job objects.
*/
struct pvr_queue {
/** @scheduler: Single entity scheduler use to push jobs to this queue. */
struct drm_gpu_scheduler scheduler;
/** @entity: Scheduling entity backing this queue. */
struct drm_sched_entity entity;
/** @type: Type of jobs queued to this queue. */
enum drm_pvr_job_type type;
/** @ctx: Context object this queue is bound to. */
struct pvr_context *ctx;
/** @node: Used to add the queue to the active/idle queue list. */
struct list_head node;
/**
* @in_flight_job_count: Number of jobs submitted to the CCCB that
* have not been processed yet.
*/
atomic_t in_flight_job_count;
/**
* @cccb_fence_ctx: CCCB fence context.
*
* Used to control access to the CCCB is full, such that we don't
* end up trying to push commands to the CCCB if there's not enough
* space to receive all commands needed for a job to complete.
*/
struct pvr_queue_cccb_fence_ctx cccb_fence_ctx;
/** @job_fence_ctx: Job fence context object. */
struct pvr_queue_fence_ctx job_fence_ctx;
/** @timeline_ufo: Timeline UFO for the context queue. */
struct {
/** @fw_obj: FW object representing the UFO value. */
struct pvr_fw_object *fw_obj;
/** @value: CPU mapping of the UFO value. */
u32 *value;
} timeline_ufo;
/**
* last_queued_job_scheduled_fence: The scheduled fence of the last
* job queued to this queue.
*
* We use it to insert frag -> geom dependencies when issuing combined
* geom+frag jobs, to guarantee that the fragment job that's part of
* the combined operation comes after all fragment jobs that were queued
* before it.
*/
struct dma_fence *last_queued_job_scheduled_fence;
/** @cccb: Client Circular Command Buffer. */
struct pvr_cccb cccb;
/** @reg_state_obj: FW object representing the register state of this queue. */
struct pvr_fw_object *reg_state_obj;
/** @ctx_offset: Offset of the queue context in the FW context object. */
u32 ctx_offset;
/** @callstack_addr: Initial call stack address for register state object. */
u64 callstack_addr;
};
bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f);
int pvr_queue_job_init(struct pvr_job *job);
void pvr_queue_job_cleanup(struct pvr_job *job);
void pvr_queue_job_push(struct pvr_job *job);
struct dma_fence *pvr_queue_job_arm(struct pvr_job *job);
struct pvr_queue *pvr_queue_create(struct pvr_context *ctx,
enum drm_pvr_job_type type,
struct drm_pvr_ioctl_create_context_args *args,
void *fw_ctx_map);
void pvr_queue_kill(struct pvr_queue *queue);
void pvr_queue_destroy(struct pvr_queue *queue);
void pvr_queue_process(struct pvr_queue *queue);
void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev);
void pvr_queue_device_post_reset(struct pvr_device *pvr_dev);
int pvr_queue_device_init(struct pvr_device *pvr_dev);
void pvr_queue_device_fini(struct pvr_device *pvr_dev);
#endif /* PVR_QUEUE_H */

View File

@ -43,6 +43,232 @@
* existing parameters, to preserve order. As parameters are naturally aligned, care must be taken
* with respect to implicit padding in the stream; padding should be minimised as much as possible.
*/
static const struct pvr_stream_def rogue_fwif_cmd_geom_stream[] = {
PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.vdm_ctrl_stream_base, 64),
PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.tpu_border_colour_table, 64),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.vdm_draw_indirect0, 64,
PVR_FEATURE_VDM_DRAWINDIRECT),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.vdm_draw_indirect1, 32,
PVR_FEATURE_VDM_DRAWINDIRECT),
PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.ppp_ctrl, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.te_psg, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.vdm_context_resume_task0_size, 32),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.vdm_context_resume_task3_size, 32,
PVR_FEATURE_VDM_OBJECT_LEVEL_LLS),
PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.view_idx, 32),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_geom, regs.pds_coeff_free_prog, 32,
PVR_FEATURE_TESSELLATION),
};
static const struct pvr_stream_def rogue_fwif_cmd_geom_stream_brn49927[] = {
PVR_STREAM_DEF(rogue_fwif_cmd_geom, regs.tpu, 32),
};
static const struct pvr_stream_ext_def cmd_geom_ext_streams_0[] = {
{
.stream = rogue_fwif_cmd_geom_stream_brn49927,
.stream_len = ARRAY_SIZE(rogue_fwif_cmd_geom_stream_brn49927),
.header_mask = PVR_STREAM_EXTHDR_GEOM0_BRN49927,
.quirk = 49927,
},
};
static const struct pvr_stream_ext_header cmd_geom_ext_headers[] = {
{
.ext_streams = cmd_geom_ext_streams_0,
.ext_streams_num = ARRAY_SIZE(cmd_geom_ext_streams_0),
.valid_mask = PVR_STREAM_EXTHDR_GEOM0_VALID,
},
};
const struct pvr_stream_cmd_defs pvr_cmd_geom_stream = {
.type = PVR_STREAM_TYPE_GEOM,
.main_stream = rogue_fwif_cmd_geom_stream,
.main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_geom_stream),
.ext_nr_headers = ARRAY_SIZE(cmd_geom_ext_headers),
.ext_headers = cmd_geom_ext_headers,
.dest_size = sizeof(struct rogue_fwif_cmd_geom),
};
static const struct pvr_stream_def rogue_fwif_cmd_frag_stream[] = {
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_scissor_base, 64),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_dbias_base, 64),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_oclqry_base, 64),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_zlsctl, 64),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_zload_store_base, 64),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_stencil_load_store_base, 64),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.fb_cdc_zls, 64,
PVR_FEATURE_REQUIRES_FB_CDC_ZLS_SETUP),
PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.pbe_word),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.tpu_border_colour_table, 64),
PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.pds_bgnd),
PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.pds_pr_bgnd),
PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_frag, regs.usc_clear_register),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.usc_pixel_output_ctrl, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_bgobjdepth, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_bgobjvals, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_aa, 32),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.isp_xtp_pipe_enable, 32,
PVR_FEATURE_S7_TOP_INFRASTRUCTURE),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_ctl, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.event_pixel_pds_info, 32),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.pixel_phantom, 32,
PVR_FEATURE_CLUSTER_GROUPING),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.view_idx, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.event_pixel_pds_data, 32),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.isp_oclqry_stride, 32,
PVR_FEATURE_GPU_MULTICORE_SUPPORT),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.isp_zls_pixels, 32,
PVR_FEATURE_ZLS_SUBTILE),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, regs.rgx_cr_blackpearl_fix, 32,
PVR_FEATURE_ISP_ZLS_D24_S8_PACKING_OGL_MODE),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, zls_stride, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_frag, sls_stride, 32),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_frag, execute_count, 32,
PVR_FEATURE_GPU_MULTICORE_SUPPORT),
};
static const struct pvr_stream_def rogue_fwif_cmd_frag_stream_brn47217[] = {
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.isp_oclqry_stride, 32),
};
static const struct pvr_stream_def rogue_fwif_cmd_frag_stream_brn49927[] = {
PVR_STREAM_DEF(rogue_fwif_cmd_frag, regs.tpu, 32),
};
static const struct pvr_stream_ext_def cmd_frag_ext_streams_0[] = {
{
.stream = rogue_fwif_cmd_frag_stream_brn47217,
.stream_len = ARRAY_SIZE(rogue_fwif_cmd_frag_stream_brn47217),
.header_mask = PVR_STREAM_EXTHDR_FRAG0_BRN47217,
.quirk = 47217,
},
{
.stream = rogue_fwif_cmd_frag_stream_brn49927,
.stream_len = ARRAY_SIZE(rogue_fwif_cmd_frag_stream_brn49927),
.header_mask = PVR_STREAM_EXTHDR_FRAG0_BRN49927,
.quirk = 49927,
},
};
static const struct pvr_stream_ext_header cmd_frag_ext_headers[] = {
{
.ext_streams = cmd_frag_ext_streams_0,
.ext_streams_num = ARRAY_SIZE(cmd_frag_ext_streams_0),
.valid_mask = PVR_STREAM_EXTHDR_FRAG0_VALID,
},
};
const struct pvr_stream_cmd_defs pvr_cmd_frag_stream = {
.type = PVR_STREAM_TYPE_FRAG,
.main_stream = rogue_fwif_cmd_frag_stream,
.main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_frag_stream),
.ext_nr_headers = ARRAY_SIZE(cmd_frag_ext_headers),
.ext_headers = cmd_frag_ext_headers,
.dest_size = sizeof(struct rogue_fwif_cmd_frag),
};
static const struct pvr_stream_def rogue_fwif_cmd_compute_stream[] = {
PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.tpu_border_colour_table, 64),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_cb_queue, 64,
PVR_FEATURE_CDM_USER_MODE_QUEUE),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_cb_base, 64,
PVR_FEATURE_CDM_USER_MODE_QUEUE),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_cb, 64,
PVR_FEATURE_CDM_USER_MODE_QUEUE),
PVR_STREAM_DEF_NOT_FEATURE(rogue_fwif_cmd_compute, regs.cdm_ctrl_stream_base, 64,
PVR_FEATURE_CDM_USER_MODE_QUEUE),
PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.cdm_context_state_base_addr, 64),
PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.cdm_resume_pds1, 32),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.cdm_item, 32,
PVR_FEATURE_COMPUTE_MORTON_CAPABLE),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.compute_cluster, 32,
PVR_FEATURE_CLUSTER_GROUPING),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, regs.tpu_tag_cdm_ctrl, 32,
PVR_FEATURE_TPU_DM_GLOBAL_REGISTERS),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, stream_start_offset, 32,
PVR_FEATURE_CDM_USER_MODE_QUEUE),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_compute, execute_count, 32,
PVR_FEATURE_GPU_MULTICORE_SUPPORT),
};
static const struct pvr_stream_def rogue_fwif_cmd_compute_stream_brn49927[] = {
PVR_STREAM_DEF(rogue_fwif_cmd_compute, regs.tpu, 32),
};
static const struct pvr_stream_ext_def cmd_compute_ext_streams_0[] = {
{
.stream = rogue_fwif_cmd_compute_stream_brn49927,
.stream_len = ARRAY_SIZE(rogue_fwif_cmd_compute_stream_brn49927),
.header_mask = PVR_STREAM_EXTHDR_COMPUTE0_BRN49927,
.quirk = 49927,
},
};
static const struct pvr_stream_ext_header cmd_compute_ext_headers[] = {
{
.ext_streams = cmd_compute_ext_streams_0,
.ext_streams_num = ARRAY_SIZE(cmd_compute_ext_streams_0),
.valid_mask = PVR_STREAM_EXTHDR_COMPUTE0_VALID,
},
};
const struct pvr_stream_cmd_defs pvr_cmd_compute_stream = {
.type = PVR_STREAM_TYPE_COMPUTE,
.main_stream = rogue_fwif_cmd_compute_stream,
.main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_compute_stream),
.ext_nr_headers = ARRAY_SIZE(cmd_compute_ext_headers),
.ext_headers = cmd_compute_ext_headers,
.dest_size = sizeof(struct rogue_fwif_cmd_compute),
};
static const struct pvr_stream_def rogue_fwif_cmd_transfer_stream[] = {
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.pds_bgnd0_base, 64),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.pds_bgnd1_base, 64),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.pds_bgnd3_sizeinfo, 64),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_mtile_base, 64),
PVR_STREAM_DEF_ARRAY(rogue_fwif_cmd_transfer, regs.pbe_wordx_mrty),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_bgobjvals, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_pixel_output_ctrl, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register0, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register1, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register2, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.usc_clear_register3, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_mtile_size, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_render_origin, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_ctl, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_aa, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.event_pixel_pds_info, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.event_pixel_pds_code, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.event_pixel_pds_data, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_render, 32),
PVR_STREAM_DEF(rogue_fwif_cmd_transfer, regs.isp_rgn, 32),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_transfer, regs.isp_xtp_pipe_enable, 32,
PVR_FEATURE_S7_TOP_INFRASTRUCTURE),
PVR_STREAM_DEF_FEATURE(rogue_fwif_cmd_transfer, regs.frag_screen, 32,
PVR_FEATURE_GPU_MULTICORE_SUPPORT),
};
const struct pvr_stream_cmd_defs pvr_cmd_transfer_stream = {
.type = PVR_STREAM_TYPE_TRANSFER,
.main_stream = rogue_fwif_cmd_transfer_stream,
.main_stream_len = ARRAY_SIZE(rogue_fwif_cmd_transfer_stream),
.ext_nr_headers = 0,
.dest_size = sizeof(struct rogue_fwif_cmd_transfer),
};
static const struct pvr_stream_def rogue_fwif_static_render_context_state_stream[] = {
PVR_STREAM_DEF(rogue_fwif_geom_registers_caswitch,
geom_reg_vdm_context_state_base_addr, 64),

View File

@ -0,0 +1,289 @@
// SPDX-License-Identifier: GPL-2.0-only OR MIT
/* Copyright (c) 2023 Imagination Technologies Ltd. */
#include <uapi/drm/pvr_drm.h>
#include <drm/drm_syncobj.h>
#include <drm/gpu_scheduler.h>
#include <linux/xarray.h>
#include <linux/dma-fence-unwrap.h>
#include "pvr_device.h"
#include "pvr_queue.h"
#include "pvr_sync.h"
static int
pvr_check_sync_op(const struct drm_pvr_sync_op *sync_op)
{
u8 handle_type;
if (sync_op->flags & ~DRM_PVR_SYNC_OP_FLAGS_MASK)
return -EINVAL;
handle_type = sync_op->flags & DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_MASK;
if (handle_type != DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ &&
handle_type != DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_TIMELINE_SYNCOBJ)
return -EINVAL;
if (handle_type == DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ &&
sync_op->value != 0)
return -EINVAL;
return 0;
}
static void
pvr_sync_signal_free(struct pvr_sync_signal *sig_sync)
{
if (!sig_sync)
return;
drm_syncobj_put(sig_sync->syncobj);
dma_fence_chain_free(sig_sync->chain);
dma_fence_put(sig_sync->fence);
kfree(sig_sync);
}
void
pvr_sync_signal_array_cleanup(struct xarray *array)
{
struct pvr_sync_signal *sig_sync;
unsigned long i;
xa_for_each(array, i, sig_sync)
pvr_sync_signal_free(sig_sync);
xa_destroy(array);
}
static struct pvr_sync_signal *
pvr_sync_signal_array_add(struct xarray *array, struct drm_file *file, u32 handle, u64 point)
{
struct pvr_sync_signal *sig_sync;
struct dma_fence *cur_fence;
int err;
u32 id;
sig_sync = kzalloc(sizeof(*sig_sync), GFP_KERNEL);
if (!sig_sync)
return ERR_PTR(-ENOMEM);
sig_sync->handle = handle;
sig_sync->point = point;
if (point > 0) {
sig_sync->chain = dma_fence_chain_alloc();
if (!sig_sync->chain) {
err = -ENOMEM;
goto err_free_sig_sync;
}
}
sig_sync->syncobj = drm_syncobj_find(file, handle);
if (!sig_sync->syncobj) {
err = -EINVAL;
goto err_free_sig_sync;
}
/* Retrieve the current fence attached to that point. It's
* perfectly fine to get a NULL fence here, it just means there's
* no fence attached to that point yet.
*/
if (!drm_syncobj_find_fence(file, handle, point, 0, &cur_fence))
sig_sync->fence = cur_fence;
err = xa_alloc(array, &id, sig_sync, xa_limit_32b, GFP_KERNEL);
if (err)
goto err_free_sig_sync;
return sig_sync;
err_free_sig_sync:
pvr_sync_signal_free(sig_sync);
return ERR_PTR(err);
}
static struct pvr_sync_signal *
pvr_sync_signal_array_search(struct xarray *array, u32 handle, u64 point)
{
struct pvr_sync_signal *sig_sync;
unsigned long i;
xa_for_each(array, i, sig_sync) {
if (handle == sig_sync->handle && point == sig_sync->point)
return sig_sync;
}
return NULL;
}
static struct pvr_sync_signal *
pvr_sync_signal_array_get(struct xarray *array, struct drm_file *file, u32 handle, u64 point)
{
struct pvr_sync_signal *sig_sync;
sig_sync = pvr_sync_signal_array_search(array, handle, point);
if (sig_sync)
return sig_sync;
return pvr_sync_signal_array_add(array, file, handle, point);
}
int
pvr_sync_signal_array_collect_ops(struct xarray *array,
struct drm_file *file,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops)
{
for (u32 i = 0; i < sync_op_count; i++) {
struct pvr_sync_signal *sig_sync;
int ret;
if (!(sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL))
continue;
ret = pvr_check_sync_op(&sync_ops[i]);
if (ret)
return ret;
sig_sync = pvr_sync_signal_array_get(array, file,
sync_ops[i].handle,
sync_ops[i].value);
if (IS_ERR(sig_sync))
return PTR_ERR(sig_sync);
}
return 0;
}
int
pvr_sync_signal_array_update_fences(struct xarray *array,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops,
struct dma_fence *done_fence)
{
for (u32 i = 0; i < sync_op_count; i++) {
struct dma_fence *old_fence;
struct pvr_sync_signal *sig_sync;
if (!(sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL))
continue;
sig_sync = pvr_sync_signal_array_search(array, sync_ops[i].handle,
sync_ops[i].value);
if (WARN_ON(!sig_sync))
return -EINVAL;
old_fence = sig_sync->fence;
sig_sync->fence = dma_fence_get(done_fence);
dma_fence_put(old_fence);
if (WARN_ON(!sig_sync->fence))
return -EINVAL;
}
return 0;
}
void
pvr_sync_signal_array_push_fences(struct xarray *array)
{
struct pvr_sync_signal *sig_sync;
unsigned long i;
xa_for_each(array, i, sig_sync) {
if (sig_sync->chain) {
drm_syncobj_add_point(sig_sync->syncobj, sig_sync->chain,
sig_sync->fence, sig_sync->point);
sig_sync->chain = NULL;
} else {
drm_syncobj_replace_fence(sig_sync->syncobj, sig_sync->fence);
}
}
}
static int
pvr_sync_add_dep_to_job(struct drm_sched_job *job, struct dma_fence *f)
{
struct dma_fence_unwrap iter;
u32 native_fence_count = 0;
struct dma_fence *uf;
int err = 0;
dma_fence_unwrap_for_each(uf, &iter, f) {
if (pvr_queue_fence_is_ufo_backed(uf))
native_fence_count++;
}
/* No need to unwrap the fence if it's fully non-native. */
if (!native_fence_count)
return drm_sched_job_add_dependency(job, f);
dma_fence_unwrap_for_each(uf, &iter, f) {
/* There's no dma_fence_unwrap_stop() helper cleaning up the refs
* owned by dma_fence_unwrap(), so let's just iterate over all
* entries without doing anything when something failed.
*/
if (err)
continue;
if (pvr_queue_fence_is_ufo_backed(uf)) {
struct drm_sched_fence *s_fence = to_drm_sched_fence(uf);
/* If this is a native dependency, we wait for the scheduled fence,
* and we will let pvr_queue_run_job() issue FW waits.
*/
err = drm_sched_job_add_dependency(job,
dma_fence_get(&s_fence->scheduled));
} else {
err = drm_sched_job_add_dependency(job, dma_fence_get(uf));
}
}
dma_fence_put(f);
return err;
}
int
pvr_sync_add_deps_to_job(struct pvr_file *pvr_file, struct drm_sched_job *job,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops,
struct xarray *signal_array)
{
int err = 0;
if (!sync_op_count)
return 0;
for (u32 i = 0; i < sync_op_count; i++) {
struct pvr_sync_signal *sig_sync;
struct dma_fence *fence;
if (sync_ops[i].flags & DRM_PVR_SYNC_OP_FLAG_SIGNAL)
continue;
err = pvr_check_sync_op(&sync_ops[i]);
if (err)
return err;
sig_sync = pvr_sync_signal_array_search(signal_array, sync_ops[i].handle,
sync_ops[i].value);
if (sig_sync) {
if (WARN_ON(!sig_sync->fence))
return -EINVAL;
fence = dma_fence_get(sig_sync->fence);
} else {
err = drm_syncobj_find_fence(from_pvr_file(pvr_file), sync_ops[i].handle,
sync_ops[i].value, 0, &fence);
if (err)
return err;
}
err = pvr_sync_add_dep_to_job(job, fence);
if (err)
return err;
}
return 0;
}

View File

@ -0,0 +1,84 @@
/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
/* Copyright (c) 2023 Imagination Technologies Ltd. */
#ifndef PVR_SYNC_H
#define PVR_SYNC_H
#include <uapi/drm/pvr_drm.h>
/* Forward declaration from <linux/xarray.h>. */
struct xarray;
/* Forward declaration from <drm/drm_file.h>. */
struct drm_file;
/* Forward declaration from <drm/gpu_scheduler.h>. */
struct drm_sched_job;
/* Forward declaration from "pvr_device.h". */
struct pvr_file;
/**
* struct pvr_sync_signal - Object encoding a syncobj signal operation
*
* The job submission logic collects all signal operations in an array of
* pvr_sync_signal objects. This array also serves as a cache to get the
* latest dma_fence when multiple jobs are submitted at once, and one job
* signals a syncobj point that's later waited on by a subsequent job.
*/
struct pvr_sync_signal {
/** @handle: Handle of the syncobj to signal. */
u32 handle;
/**
* @point: Point to signal in the syncobj.
*
* Only relevant for timeline syncobjs.
*/
u64 point;
/** @syncobj: Syncobj retrieved from the handle. */
struct drm_syncobj *syncobj;
/**
* @chain: Chain object used to link the new fence with the
* existing timeline syncobj.
*
* Should be zero when manipulating a regular syncobj.
*/
struct dma_fence_chain *chain;
/**
* @fence: New fence object to attach to the syncobj.
*
* This pointer starts with the current fence bound to
* the <handle,point> pair.
*/
struct dma_fence *fence;
};
void
pvr_sync_signal_array_cleanup(struct xarray *array);
int
pvr_sync_signal_array_collect_ops(struct xarray *array,
struct drm_file *file,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops);
int
pvr_sync_signal_array_update_fences(struct xarray *array,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops,
struct dma_fence *done_fence);
void
pvr_sync_signal_array_push_fences(struct xarray *array);
int
pvr_sync_add_deps_to_job(struct pvr_file *pvr_file, struct drm_sched_job *job,
u32 sync_op_count,
const struct drm_pvr_sync_op *sync_ops,
struct xarray *signal_array);
#endif /* PVR_SYNC_H */