mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-29 23:24:11 +08:00
habanalabs: Separate CS job completion from its deallocation
Current CS jobs are no longer needed after their completion. However, jobs of future workload might be in use even after they are completed. To allow that, the patch adds a refcount to the job object, and decouples its completion handling from its deallocation. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
0da5698bf4
commit
649c459212
@ -155,6 +155,18 @@ static void cs_put(struct hl_cs *cs)
|
||||
kref_put(&cs->refcount, cs_do_release);
|
||||
}
|
||||
|
||||
static void cs_job_do_release(struct kref *ref)
|
||||
{
|
||||
struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
|
||||
|
||||
kfree(job);
|
||||
}
|
||||
|
||||
static void cs_job_put(struct hl_cs_job *job)
|
||||
{
|
||||
kref_put(&job->refcount, cs_job_do_release);
|
||||
}
|
||||
|
||||
static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
|
||||
{
|
||||
/*
|
||||
@ -227,7 +239,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
|
||||
static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
|
||||
{
|
||||
struct hl_cs *cs = job->cs;
|
||||
|
||||
@ -276,7 +288,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
|
||||
job->queue_type == QUEUE_TYPE_HW)
|
||||
cs_put(cs);
|
||||
|
||||
kfree(job);
|
||||
cs_job_put(job);
|
||||
}
|
||||
|
||||
static void cs_do_release(struct kref *ref)
|
||||
@ -290,13 +302,13 @@ static void cs_do_release(struct kref *ref)
|
||||
/*
|
||||
* Although if we reached here it means that all external jobs have
|
||||
* finished, because each one of them took refcnt to CS, we still
|
||||
* need to go over the internal jobs and free them. Otherwise, we
|
||||
* need to go over the internal jobs and complete them. Otherwise, we
|
||||
* will have leaked memory and what's worse, the CS object (and
|
||||
* potentially the CTX object) could be released, while the JOB
|
||||
* still holds a pointer to them (but no reference).
|
||||
*/
|
||||
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
|
||||
free_job(hdev, job);
|
||||
complete_job(hdev, job);
|
||||
|
||||
if (!cs->submitted) {
|
||||
/* In case the wait for signal CS was submitted, the put occurs
|
||||
@ -507,7 +519,7 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
|
||||
struct hl_cs_job *job, *tmp;
|
||||
|
||||
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
|
||||
free_job(hdev, job);
|
||||
complete_job(hdev, job);
|
||||
}
|
||||
|
||||
void hl_cs_rollback_all(struct hl_device *hdev)
|
||||
@ -539,7 +551,7 @@ static void job_wq_completion(struct work_struct *work)
|
||||
struct hl_device *hdev = cs->ctx->hdev;
|
||||
|
||||
/* job is no longer needed */
|
||||
free_job(hdev, job);
|
||||
complete_job(hdev, job);
|
||||
}
|
||||
|
||||
static int validate_queue_index(struct hl_device *hdev,
|
||||
@ -647,6 +659,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
|
||||
if (!job)
|
||||
return NULL;
|
||||
|
||||
kref_init(&job->refcount);
|
||||
job->queue_type = queue_type;
|
||||
job->is_kernel_allocated_cb = is_kernel_allocated_cb;
|
||||
|
||||
|
@ -168,18 +168,19 @@ static int command_submission_jobs_show(struct seq_file *s, void *data)
|
||||
if (first) {
|
||||
first = false;
|
||||
seq_puts(s, "\n");
|
||||
seq_puts(s, " JOB ID CS ID CTX ASID H/W Queue\n");
|
||||
seq_puts(s, "---------------------------------------\n");
|
||||
seq_puts(s, " JOB ID CS ID CTX ASID JOB RefCnt H/W Queue\n");
|
||||
seq_puts(s, "----------------------------------------------------\n");
|
||||
}
|
||||
if (job->cs)
|
||||
seq_printf(s,
|
||||
" %02d %llu %d %d\n",
|
||||
" %02d %llu %d %d %d\n",
|
||||
job->id, job->cs->sequence, job->cs->ctx->asid,
|
||||
job->hw_queue_id);
|
||||
kref_read(&job->refcount), job->hw_queue_id);
|
||||
else
|
||||
seq_printf(s,
|
||||
" %02d 0 %d %d\n",
|
||||
job->id, HL_KERNEL_ASID_ID, job->hw_queue_id);
|
||||
" %02d 0 %d %d %d\n",
|
||||
job->id, HL_KERNEL_ASID_ID,
|
||||
kref_read(&job->refcount), job->hw_queue_id);
|
||||
}
|
||||
|
||||
spin_unlock(&dev_entry->cs_job_spinlock);
|
||||
|
@ -1142,6 +1142,7 @@ struct hl_cs {
|
||||
* @userptr_list: linked-list of userptr mappings that belong to this job and
|
||||
* wait for completion.
|
||||
* @debugfs_list: node in debugfs list of command submission jobs.
|
||||
* @refcount: reference counter for usage of the CS job.
|
||||
* @queue_type: the type of the H/W queue this job is submitted to.
|
||||
* @id: the id of this job inside a CS.
|
||||
* @hw_queue_id: the id of the H/W queue this job is submitted to.
|
||||
@ -1165,6 +1166,7 @@ struct hl_cs_job {
|
||||
struct work_struct finish_work;
|
||||
struct list_head userptr_list;
|
||||
struct list_head debugfs_list;
|
||||
struct kref refcount;
|
||||
enum hl_queue_type queue_type;
|
||||
u32 id;
|
||||
u32 hw_queue_id;
|
||||
|
Loading…
Reference in New Issue
Block a user