From da573eaa3a13f60efafcbe25e4f4465cf1a1b40b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 20 Jun 2016 13:15:05 -0600 Subject: [PATCH] ntb_perf: Improve thread handling to increase robustness This commit accomplishes a few things: 1) Properly prevent multiple sets of threads from running at once using a mutex. Lots of race issues existed with the thread_cleanup. 2) The mutex allows us to ensure that threads are finished before tearing down the device or module. 3) Don't use kthread_stop when the threads can exit by themselves, as this is counter-indicated by the kthread_create documentation. Threads now wait for kthread_stop to occur. 4) Writing to the run file now blocks until the threads are complete. The test can then be safely interrupted by a SIGINT. Also, while I was at it: 5) debugfs_run_write shouldn't return 0 in the early check cases as this could cause debugfs_run_write to loop undesirably. Signed-off-by: Logan Gunthorpe Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 134 ++++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 53 deletions(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 5008ccf600a9..db4dc61164ca 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -58,6 +58,7 @@ #include #include #include +#include #define DRIVER_NAME "ntb_perf" #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" @@ -121,6 +122,7 @@ struct pthr_ctx { int dma_prep_err; int src_idx; void *srcs[MAX_SRCS]; + wait_queue_head_t *wq; }; struct perf_ctx { @@ -134,9 +136,11 @@ struct perf_ctx { struct dentry *debugfs_run; struct dentry *debugfs_threads; u8 perf_threads; - bool run; + /* mutex ensures only one set of threads run at once */ + struct mutex run_mutex; struct pthr_ctx pthr_ctx[MAX_THREADS]; atomic_t tsync; + atomic_t tdone; }; enum { @@ -295,12 +299,18 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } + + if (unlikely(kthread_should_stop())) + break; } if (use_dma) { pr_info("%s: All DMA descriptors submitted\n", current->comm); - while (atomic_read(&pctx->dma_sync) != 0) + while (atomic_read(&pctx->dma_sync) != 0) { + if (kthread_should_stop()) + break; msleep(20); + } } kstop = ktime_get(); @@ -393,7 +403,10 @@ static int ntb_perf_thread(void *data) pctx->srcs[i] = NULL; } - return 0; + atomic_inc(&perf->tdone); + wake_up(pctx->wq); + rc = 0; + goto done; err: for (i = 0; i < MAX_SRCS; i++) { @@ -406,6 +419,16 @@ err: pctx->dma_chan = NULL; } +done: + /* Wait until we are told to stop */ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + schedule(); + } + __set_current_state(TASK_RUNNING); + return rc; } @@ -553,6 +576,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, struct perf_ctx *perf = filp->private_data; char *buf; ssize_t ret, out_offset; + int running; if (!perf) return 0; @@ -560,7 +584,9 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, buf = kmalloc(64, GFP_KERNEL); if (!buf) return -ENOMEM; - out_offset = snprintf(buf, 64, "%d\n", perf->run); + + running = mutex_is_locked(&perf->run_mutex); + out_offset = snprintf(buf, 64, "%d\n", running); ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); kfree(buf); @@ -572,7 +598,6 @@ static void threads_cleanup(struct perf_ctx *perf) struct pthr_ctx *pctx; int i; - perf->run = false; for (i = 0; i < MAX_THREADS; i++) { pctx = &perf->pthr_ctx[i]; if (pctx->thread) { @@ -587,65 +612,66 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, { struct perf_ctx *perf = filp->private_data; int node, i; + DECLARE_WAIT_QUEUE_HEAD(wq); if (!perf->link_is_up) - return 0; + return -ENOLINK; if (perf->perf_threads == 0) - return 0; + return -EINVAL; - if (atomic_read(&perf->tsync) == 0) - perf->run = false; - - if (perf->run) - threads_cleanup(perf); - else { - perf->run = true; - - if (perf->perf_threads > MAX_THREADS) { - perf->perf_threads = MAX_THREADS; - pr_info("Reset total threads to: %u\n", MAX_THREADS); - } - - /* no greater than 1M */ - if (seg_order > MAX_SEG_ORDER) { - seg_order = MAX_SEG_ORDER; - pr_info("Fix seg_order to %u\n", seg_order); - } - - if (run_order < seg_order) { - run_order = seg_order; - pr_info("Fix run_order to %u\n", run_order); - } - - node = dev_to_node(&perf->ntb->pdev->dev); - /* launch kernel thread */ - for (i = 0; i < perf->perf_threads; i++) { - struct pthr_ctx *pctx; - - pctx = &perf->pthr_ctx[i]; - atomic_set(&pctx->dma_sync, 0); - pctx->perf = perf; - pctx->thread = - kthread_create_on_node(ntb_perf_thread, - (void *)pctx, - node, "ntb_perf %d", i); - if (IS_ERR(pctx->thread)) { - pctx->thread = NULL; - goto err; - } else - wake_up_process(pctx->thread); - - if (perf->run == false) - return -ENXIO; - } + if (!mutex_trylock(&perf->run_mutex)) + return -EBUSY; + if (perf->perf_threads > MAX_THREADS) { + perf->perf_threads = MAX_THREADS; + pr_info("Reset total threads to: %u\n", MAX_THREADS); } + /* no greater than 1M */ + if (seg_order > MAX_SEG_ORDER) { + seg_order = MAX_SEG_ORDER; + pr_info("Fix seg_order to %u\n", seg_order); + } + + if (run_order < seg_order) { + run_order = seg_order; + pr_info("Fix run_order to %u\n", run_order); + } + + node = dev_to_node(&perf->ntb->pdev->dev); + atomic_set(&perf->tdone, 0); + + /* launch kernel thread */ + for (i = 0; i < perf->perf_threads; i++) { + struct pthr_ctx *pctx; + + pctx = &perf->pthr_ctx[i]; + atomic_set(&pctx->dma_sync, 0); + pctx->perf = perf; + pctx->wq = &wq; + pctx->thread = + kthread_create_on_node(ntb_perf_thread, + (void *)pctx, + node, "ntb_perf %d", i); + if (IS_ERR(pctx->thread)) { + pctx->thread = NULL; + goto err; + } else { + wake_up_process(pctx->thread); + } + } + + wait_event_interruptible(wq, + atomic_read(&perf->tdone) == perf->perf_threads); + + threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return count; err: threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return -ENXIO; } @@ -713,7 +739,7 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) perf->ntb = ntb; perf->perf_threads = 1; atomic_set(&perf->tsync, 0); - perf->run = false; + mutex_init(&perf->run_mutex); spin_lock_init(&perf->db_lock); perf_setup_mw(ntb, perf); INIT_DELAYED_WORK(&perf->link_work, perf_link_work); @@ -748,6 +774,8 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) dev_dbg(&perf->ntb->dev, "%s called\n", __func__); + mutex_lock(&perf->run_mutex); + cancel_delayed_work_sync(&perf->link_work); cancel_work_sync(&perf->link_cleanup);