From 6500d2045d5247cfb2ac31cc1691d7191096389b Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 27 Mar 2023 07:43:00 +0000 Subject: [PATCH 01/11] scsi: scsi_debug: Fix check for sdev queue full There is a report that the blktests scsi/004 test for "TASK SET FULL" (TSF) now fails. The condition upon we should issue this TSF is when the sdev queue is full. The check for a full queue has an off-by-1 error. Previously we would increment the number of requests in the queue after testing if the queue would be full, i.e. test if one less than full. Since we now use scsi_device_busy() to count the number of requests in the queue, this would already account for the current request, so fix the test for queue full accordingly. Fixes: 151f0ec9ddb5 ("scsi: scsi_debug: Drop sdebug_dev_info.num_in_q") Reported-by: kernel test robot Link: https://lore.kernel.org/oe-lkp/202303201334.18b30edc-oliver.sang@intel.com Signed-off-by: John Garry Link: https://lore.kernel.org/r/20230327074310.1862889-2-john.g.garry@oracle.com Acked-by: Douglas Gilbert Tested-by: Yi Zhang Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 7ed117e78bd4..782515abca2c 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -5580,7 +5580,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, int num_in_q = scsi_device_busy(sdp); int qdepth = cmnd->device->queue_depth; - if ((num_in_q == (qdepth - 1)) && + if ((num_in_q == qdepth) && (atomic_inc_return(&sdebug_a_tsf) >= abs(sdebug_every_nth))) { atomic_set(&sdebug_a_tsf, 0); From 00f9d622e8b237d8403569ee51f7bfb9bf89a2d5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 27 Mar 2023 07:43:01 +0000 Subject: [PATCH 02/11] scsi: scsi_debug: Don't iter all shosts in clear_luns_changed_on_target() In clear_luns_changed_on_target(), we iter all devices for all shosts to conditionally clear the SDEBUG_UA_LUNS_CHANGED flag in the per-device uas_bm. One condition to see whether we clear the flag is to test whether the host for the device under consideration is the same as the matching device's (devip) host. This check will only ever pass for devices for the same shost, so only iter the devices for the matching device shost. We can now drop the spinlock'ing of the sdebug_host_list_lock in the same function. This will allow us to use a mutex instead of the spinlock for the global shost lock, as clear_luns_changed_on_target() could be called in non-blocking context, in scsi_debug_queuecommand() -> make_ua() -> clear_luns_changed_on_target() (which is why required a spinlock). Signed-off-by: John Garry Link: https://lore.kernel.org/r/20230327074310.1862889-3-john.g.garry@oracle.com Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 782515abca2c..eba6eca81e84 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1063,18 +1063,15 @@ static void all_config_cdb_len(void) static void clear_luns_changed_on_target(struct sdebug_dev_info *devip) { - struct sdebug_host_info *sdhp; + struct sdebug_host_info *sdhp = devip->sdbg_host; struct sdebug_dev_info *dp; - spin_lock(&sdebug_host_list_lock); - list_for_each_entry(sdhp, &sdebug_host_list, host_list) { - list_for_each_entry(dp, &sdhp->dev_info_list, dev_list) { - if ((devip->sdbg_host == dp->sdbg_host) && - (devip->target == dp->target)) - clear_bit(SDEBUG_UA_LUNS_CHANGED, dp->uas_bm); + list_for_each_entry(dp, &sdhp->dev_info_list, dev_list) { + if ((devip->sdbg_host == dp->sdbg_host) && + (devip->target == dp->target)) { + clear_bit(SDEBUG_UA_LUNS_CHANGED, dp->uas_bm); } } - spin_unlock(&sdebug_host_list_lock); } static int make_ua(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) From 0aaa3fad4fd931ba3accac3a1fcc7334ca780591 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 27 Mar 2023 07:43:02 +0000 Subject: [PATCH 03/11] scsi: scsi_debug: Change shost list lock to a mutex The shost list lock, sdebug_host_list_lock, is a spinlock. We would only lock in non-atomic context in this driver, so use a mutex instead, which is friendlier if we need to schedule when iterating. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20230327074310.1862889-4-john.g.garry@oracle.com Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index eba6eca81e84..a61e7c31dab5 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -816,7 +816,7 @@ static int sdebug_cylinders_per; /* cylinders per surface */ static int sdebug_sectors_per; /* sectors per cylinder */ static LIST_HEAD(sdebug_host_list); -static DEFINE_SPINLOCK(sdebug_host_list_lock); +static DEFINE_MUTEX(sdebug_host_list_mutex); static struct xarray per_store_arr; static struct xarray *per_store_ap = &per_store_arr; @@ -908,7 +908,7 @@ static void sdebug_max_tgts_luns(void) struct sdebug_host_info *sdbg_host; struct Scsi_Host *hpnt; - spin_lock(&sdebug_host_list_lock); + mutex_lock(&sdebug_host_list_mutex); list_for_each_entry(sdbg_host, &sdebug_host_list, host_list) { hpnt = sdbg_host->shost; if ((hpnt->this_id >= 0) && @@ -919,7 +919,7 @@ static void sdebug_max_tgts_luns(void) /* sdebug_max_luns; */ hpnt->max_lun = SCSI_W_LUN_REPORT_LUNS + 1; } - spin_unlock(&sdebug_host_list_lock); + mutex_unlock(&sdebug_host_list_mutex); } enum sdeb_cmd_data {SDEB_IN_DATA = 0, SDEB_IN_CDB = 1}; @@ -1051,14 +1051,14 @@ static void all_config_cdb_len(void) struct Scsi_Host *shost; struct scsi_device *sdev; - spin_lock(&sdebug_host_list_lock); + mutex_lock(&sdebug_host_list_mutex); list_for_each_entry(sdbg_host, &sdebug_host_list, host_list) { shost = sdbg_host->shost; shost_for_each_device(sdev, shost) { config_cdb_len(sdev); } } - spin_unlock(&sdebug_host_list_lock); + mutex_unlock(&sdebug_host_list_mutex); } static void clear_luns_changed_on_target(struct sdebug_dev_info *devip) @@ -5423,7 +5423,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt) ++num_host_resets; if (SDEBUG_OPT_ALL_NOISE & sdebug_opts) sdev_printk(KERN_INFO, SCpnt->device, "%s\n", __func__); - spin_lock(&sdebug_host_list_lock); + mutex_lock(&sdebug_host_list_mutex); list_for_each_entry(sdbg_host, &sdebug_host_list, host_list) { list_for_each_entry(devip, &sdbg_host->dev_info_list, dev_list) { @@ -5431,7 +5431,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt) ++k; } } - spin_unlock(&sdebug_host_list_lock); + mutex_unlock(&sdebug_host_list_mutex); stop_all_queued(); if (SDEBUG_OPT_RESET_NOISE & sdebug_opts) sdev_printk(KERN_INFO, SCpnt->device, @@ -6337,13 +6337,13 @@ static ssize_t lun_format_store(struct device_driver *ddp, const char *buf, struct sdebug_host_info *sdhp; struct sdebug_dev_info *dp; - spin_lock(&sdebug_host_list_lock); + mutex_lock(&sdebug_host_list_mutex); list_for_each_entry(sdhp, &sdebug_host_list, host_list) { list_for_each_entry(dp, &sdhp->dev_info_list, dev_list) { set_bit(SDEBUG_UA_LUNS_CHANGED, dp->uas_bm); } } - spin_unlock(&sdebug_host_list_lock); + mutex_unlock(&sdebug_host_list_mutex); } return count; } @@ -6373,7 +6373,7 @@ static ssize_t max_luns_store(struct device_driver *ddp, const char *buf, struct sdebug_host_info *sdhp; struct sdebug_dev_info *dp; - spin_lock(&sdebug_host_list_lock); + mutex_lock(&sdebug_host_list_mutex); list_for_each_entry(sdhp, &sdebug_host_list, host_list) { list_for_each_entry(dp, &sdhp->dev_info_list, @@ -6382,7 +6382,7 @@ static ssize_t max_luns_store(struct device_driver *ddp, const char *buf, dp->uas_bm); } } - spin_unlock(&sdebug_host_list_lock); + mutex_unlock(&sdebug_host_list_mutex); } return count; } @@ -6489,7 +6489,7 @@ static ssize_t virtual_gb_store(struct device_driver *ddp, const char *buf, struct sdebug_host_info *sdhp; struct sdebug_dev_info *dp; - spin_lock(&sdebug_host_list_lock); + mutex_lock(&sdebug_host_list_mutex); list_for_each_entry(sdhp, &sdebug_host_list, host_list) { list_for_each_entry(dp, &sdhp->dev_info_list, @@ -6498,7 +6498,7 @@ static ssize_t virtual_gb_store(struct device_driver *ddp, const char *buf, dp->uas_bm); } } - spin_unlock(&sdebug_host_list_lock); + mutex_unlock(&sdebug_host_list_mutex); } return count; } @@ -7258,9 +7258,9 @@ static int sdebug_add_host_helper(int per_host_idx) goto clean; } - spin_lock(&sdebug_host_list_lock); + mutex_lock(&sdebug_host_list_mutex); list_add_tail(&sdbg_host->host_list, &sdebug_host_list); - spin_unlock(&sdebug_host_list_lock); + mutex_unlock(&sdebug_host_list_mutex); sdbg_host->dev.bus = &pseudo_lld_bus; sdbg_host->dev.parent = pseudo_primary; @@ -7269,9 +7269,9 @@ static int sdebug_add_host_helper(int per_host_idx) error = device_register(&sdbg_host->dev); if (error) { - spin_lock(&sdebug_host_list_lock); + mutex_lock(&sdebug_host_list_mutex); list_del(&sdbg_host->host_list); - spin_unlock(&sdebug_host_list_lock); + mutex_unlock(&sdebug_host_list_mutex); goto clean; } @@ -7311,7 +7311,7 @@ static void sdebug_do_remove_host(bool the_end) struct sdebug_host_info *sdbg_host = NULL; struct sdebug_host_info *sdbg_host2; - spin_lock(&sdebug_host_list_lock); + mutex_lock(&sdebug_host_list_mutex); if (!list_empty(&sdebug_host_list)) { sdbg_host = list_entry(sdebug_host_list.prev, struct sdebug_host_info, host_list); @@ -7336,7 +7336,7 @@ static void sdebug_do_remove_host(bool the_end) } if (sdbg_host) list_del(&sdbg_host->host_list); - spin_unlock(&sdebug_host_list_lock); + mutex_unlock(&sdebug_host_list_mutex); if (!sdbg_host) return; From 25b80b2c7582ea15ba90b8007f1e1f1b8fc762b9 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 27 Mar 2023 07:43:03 +0000 Subject: [PATCH 04/11] scsi: scsi_debug: Protect block_unblock_all_queues() with mutex There is no reason that calls to block_unblock_all_queues() from different context can't race with one another, so protect with the sdebug_host_list_mutex. There's no need for a more fine-grained per shost locking here (and we don't have a per-host lock anyway). Also simplify some touched code in sdebug_change_qdepth(). Signed-off-by: John Garry Link: https://lore.kernel.org/r/20230327074310.1862889-5-john.g.garry@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index a61e7c31dab5..cd05e2f87417 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -5497,6 +5497,8 @@ static void block_unblock_all_queues(bool block) int j; struct sdebug_queue *sqp; + lockdep_assert_held(&sdebug_host_list_mutex); + for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) atomic_set(&sqp->blocked, (int)block); } @@ -5511,10 +5513,13 @@ static void tweak_cmnd_count(void) modulo = abs(sdebug_every_nth); if (modulo < 2) return; + + mutex_lock(&sdebug_host_list_mutex); block_unblock_all_queues(true); count = atomic_read(&sdebug_cmnd_count); atomic_set(&sdebug_cmnd_count, (count / modulo) * modulo); block_unblock_all_queues(false); + mutex_unlock(&sdebug_host_list_mutex); } static void clear_queue_stats(void) @@ -6036,6 +6041,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf, int j, k; struct sdebug_queue *sqp; + mutex_lock(&sdebug_host_list_mutex); block_unblock_all_queues(true); for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { @@ -6051,6 +6057,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf, sdebug_ndelay = 0; } block_unblock_all_queues(false); + mutex_unlock(&sdebug_host_list_mutex); } return res; } @@ -6076,6 +6083,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf, int j, k; struct sdebug_queue *sqp; + mutex_lock(&sdebug_host_list_mutex); block_unblock_all_queues(true); for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { @@ -6092,6 +6100,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf, : DEF_JDELAY; } block_unblock_all_queues(false); + mutex_unlock(&sdebug_host_list_mutex); } return res; } @@ -6405,6 +6414,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf, if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) && (n <= SDEBUG_CANQUEUE) && (sdebug_host_max_queue == 0)) { + mutex_lock(&sdebug_host_list_mutex); block_unblock_all_queues(true); k = 0; for (j = 0, sqp = sdebug_q_arr; j < submit_queues; @@ -6421,6 +6431,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf, else atomic_set(&retired_max_queue, 0); block_unblock_all_queues(false); + mutex_unlock(&sdebug_host_list_mutex); return count; } return -EINVAL; @@ -7352,7 +7363,9 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth) if (!devip) return -ENODEV; + mutex_lock(&sdebug_host_list_mutex); block_unblock_all_queues(true); + if (qdepth > SDEBUG_CANQUEUE) { qdepth = SDEBUG_CANQUEUE; pr_warn("%s: requested qdepth [%d] exceeds canqueue [%d], trim\n", __func__, @@ -7363,9 +7376,12 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth) if (qdepth != sdev->queue_depth) scsi_change_queue_depth(sdev, qdepth); + block_unblock_all_queues(false); + mutex_unlock(&sdebug_host_list_mutex); + if (SDEBUG_OPT_Q_NOISE & sdebug_opts) sdev_printk(KERN_INFO, sdev, "%s: qdepth=%d\n", __func__, qdepth); - block_unblock_all_queues(false); + return sdev->queue_depth; } From a0473bf31df5bf2da7ecb50023c129659ce0a835 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 27 Mar 2023 07:43:04 +0000 Subject: [PATCH 05/11] scsi: scsi_debug: Use scsi_block_requests() to block queues The feature to block queues is quite dubious, since it races with in-flight IO. Indeed, it seems unnecessary for block queues for any times we do so. Anyway, to keep the same behaviour, use standard SCSI API to stop IO being sent - scsi_{un}block_requests(). Signed-off-by: John Garry Link: https://lore.kernel.org/r/20230327074310.1862889-6-john.g.garry@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index cd05e2f87417..f53f3e78aaa1 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -359,7 +359,6 @@ struct sdebug_queue { struct sdebug_queued_cmd qc_arr[SDEBUG_CANQUEUE]; unsigned long in_use_bm[SDEBUG_CANQUEUE_WORDS]; spinlock_t qc_lock; - atomic_t blocked; /* to temporarily stop more being queued */ }; static atomic_t sdebug_cmnd_count; /* number of incoming commands */ @@ -5494,13 +5493,18 @@ static void sdebug_build_parts(unsigned char *ramp, unsigned long store_size) static void block_unblock_all_queues(bool block) { - int j; - struct sdebug_queue *sqp; + struct sdebug_host_info *sdhp; lockdep_assert_held(&sdebug_host_list_mutex); - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) - atomic_set(&sqp->blocked, (int)block); + list_for_each_entry(sdhp, &sdebug_host_list, host_list) { + struct Scsi_Host *shost = sdhp->shost; + + if (block) + scsi_block_requests(shost); + else + scsi_unblock_requests(shost); + } } /* Adjust (by rounding down) the sdebug_cmnd_count so abs(every_nth)-1 @@ -5572,10 +5576,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, sqp = get_queue(cmnd); spin_lock_irqsave(&sqp->qc_lock, iflags); - if (unlikely(atomic_read(&sqp->blocked))) { - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - return SCSI_MLQUEUE_HOST_BUSY; - } if (unlikely(sdebug_every_nth && (SDEBUG_OPT_RARE_TSF & sdebug_opts) && (scsi_result == 0))) { From 1107c7b24ee3280abfc59f1b9186e285cabdd3ec Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 27 Mar 2023 07:43:05 +0000 Subject: [PATCH 06/11] scsi: scsi_debug: Dynamically allocate sdebug_queued_cmd Eventually we will drop the sdebug_queue struct as it is not really required, so start with making the sdebug_queued_cmd dynamically allocated for the lifetime of the scsi_cmnd in the driver. As an interim measure, make sdebug_queued_cmd.sd_dp a pointer to struct sdebug_defer. Also keep a value of the index allocated in sdebug_queued_cmd.qc_arr in struct sdebug_queued_cmd. To deal with an races in accessing the scsi cmnd allocated struct sdebug_queued_cmd, add a spinlock for the scsi command in its priv area. Races may be between scheduling a command for completion, aborting a command, and the command actually completing and freeing the struct sdebug_queued_cmd. [mkp: typo fix] Signed-off-by: John Garry Link: https://lore.kernel.org/r/20230327074310.1862889-7-john.g.garry@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 449 ++++++++++++++++++++++---------------- 1 file changed, 264 insertions(+), 185 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index f53f3e78aaa1..1c85cbd92178 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -250,6 +250,11 @@ static const char *sdebug_version_date = "20210520"; #define SDEB_XA_NOT_IN_USE XA_MARK_1 +static struct kmem_cache *queued_cmd_cache; + +#define TO_QUEUED_CMD(scmd) ((void *)(scmd)->host_scribble) +#define ASSIGN_QUEUED_CMD(scmnd, qc) { (scmnd)->host_scribble = (void *) qc; } + /* Zone types (zbcr05 table 25) */ enum sdebug_z_type { ZBC_ZTYPE_CNV = 0x1, @@ -337,12 +342,8 @@ struct sdebug_defer { struct execute_work ew; ktime_t cmpl_ts;/* time since boot to complete this cmd */ int sqa_idx; /* index of sdebug_queue array */ - int qc_idx; /* index of sdebug_queued_cmd array within sqa_idx */ int hc_idx; /* hostwide tag index */ int issuing_cpu; - bool init_hrt; - bool init_wq; - bool init_poll; bool aborted; /* true when blk_abort_request() already called */ enum sdeb_defer_type defer_t; }; @@ -351,12 +352,16 @@ struct sdebug_queued_cmd { /* corresponding bit set in in_use_bm[] in owning struct sdebug_queue * instance indicates this slot is in use. */ - struct sdebug_defer *sd_dp; - struct scsi_cmnd *a_cmnd; + struct sdebug_defer sd_dp; + struct scsi_cmnd *scmd; +}; + +struct sdebug_scsi_cmd { + spinlock_t lock; }; struct sdebug_queue { - struct sdebug_queued_cmd qc_arr[SDEBUG_CANQUEUE]; + struct sdebug_queued_cmd *qc_arr[SDEBUG_CANQUEUE]; unsigned long in_use_bm[SDEBUG_CANQUEUE_WORDS]; spinlock_t qc_lock; }; @@ -508,6 +513,8 @@ static int sdebug_add_store(void); static void sdebug_erase_store(int idx, struct sdeb_store_info *sip); static void sdebug_erase_all_stores(bool apart_from_first); +static void sdebug_free_queued_cmd(struct sdebug_queued_cmd *sqcp); + /* * The following are overflow arrays for cdbs that "hit" the same index in * the opcode_info_arr array. The most time sensitive (or commonly used) cdb @@ -4919,46 +4926,48 @@ static u32 get_tag(struct scsi_cmnd *cmnd) /* Queued (deferred) command completions converge here. */ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp) { - bool aborted = sd_dp->aborted; + struct sdebug_queued_cmd *sqcp = container_of(sd_dp, struct sdebug_queued_cmd, sd_dp); int qc_idx; int retiring = 0; - unsigned long iflags; + unsigned long flags, iflags; + struct scsi_cmnd *scp = sqcp->scmd; + struct sdebug_scsi_cmd *sdsc; + bool aborted; struct sdebug_queue *sqp; - struct sdebug_queued_cmd *sqcp; - struct scsi_cmnd *scp; - if (unlikely(aborted)) - sd_dp->aborted = false; - qc_idx = sd_dp->qc_idx; - sqp = sdebug_q_arr + sd_dp->sqa_idx; + qc_idx = sd_dp->sqa_idx; if (sdebug_statistics) { atomic_inc(&sdebug_completions); if (raw_smp_processor_id() != sd_dp->issuing_cpu) atomic_inc(&sdebug_miss_cpus); } + if (!scp) { + pr_err("scmd=NULL\n"); + goto out; + } if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE))) { pr_err("wild qc_idx=%d\n", qc_idx); - return; + goto out; } + + sdsc = scsi_cmd_priv(scp); + sqp = get_queue(scp); spin_lock_irqsave(&sqp->qc_lock, iflags); - WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_NONE); - sqcp = &sqp->qc_arr[qc_idx]; - scp = sqcp->a_cmnd; - if (unlikely(scp == NULL)) { - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - pr_err("scp is NULL, sqa_idx=%d, qc_idx=%d, hc_idx=%d\n", - sd_dp->sqa_idx, qc_idx, sd_dp->hc_idx); - return; - } + spin_lock_irqsave(&sdsc->lock, flags); + aborted = sd_dp->aborted; + if (unlikely(aborted)) + sd_dp->aborted = false; + ASSIGN_QUEUED_CMD(scp, NULL); if (unlikely(atomic_read(&retired_max_queue) > 0)) retiring = 1; - sqcp->a_cmnd = NULL; + sqp->qc_arr[qc_idx] = NULL; if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) { + spin_unlock_irqrestore(&sdsc->lock, flags); spin_unlock_irqrestore(&sqp->qc_lock, iflags); - pr_err("Unexpected completion\n"); - return; + pr_err("Unexpected completion qc_idx=%d\n", qc_idx); + goto out; } if (unlikely(retiring)) { /* user has reduced max_queue */ @@ -4966,9 +4975,10 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp) retval = atomic_read(&retired_max_queue); if (qc_idx >= retval) { + spin_unlock_irqrestore(&sdsc->lock, flags); spin_unlock_irqrestore(&sqp->qc_lock, iflags); pr_err("index %d too large\n", retval); - return; + goto out; } k = find_last_bit(sqp->in_use_bm, retval); if ((k < sdebug_max_queue) || (k == retval)) @@ -4976,14 +4986,19 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp) else atomic_set(&retired_max_queue, k + 1); } + + spin_unlock_irqrestore(&sdsc->lock, flags); spin_unlock_irqrestore(&sqp->qc_lock, iflags); - if (unlikely(aborted)) { - if (sdebug_verbose) - pr_info("bypassing scsi_done() due to aborted cmd, kicking-off EH\n"); + + if (aborted) { + pr_info("bypassing scsi_done() due to aborted cmd, kicking-off EH\n"); blk_abort_request(scsi_cmd_to_rq(scp)); - return; + goto out; } + scsi_done(scp); /* callback to mid level */ +out: + sdebug_free_queued_cmd(sqcp); } /* When high resolution timer goes off this function is called. */ @@ -5233,115 +5248,126 @@ static void scsi_debug_slave_destroy(struct scsi_device *sdp) } } -static void stop_qc_helper(struct sdebug_defer *sd_dp, +/* Returns true if we require the queued memory to be freed by the caller. */ +static bool stop_qc_helper(struct sdebug_defer *sd_dp, enum sdeb_defer_type defer_t) { - if (!sd_dp) - return; - if (defer_t == SDEB_DEFER_HRT) - hrtimer_cancel(&sd_dp->hrt); - else if (defer_t == SDEB_DEFER_WQ) - cancel_work_sync(&sd_dp->ew.work); + if (defer_t == SDEB_DEFER_HRT) { + int res = hrtimer_try_to_cancel(&sd_dp->hrt); + + switch (res) { + case 0: /* Not active, it must have already run */ + case -1: /* -1 It's executing the CB */ + return false; + case 1: /* Was active, we've now cancelled */ + default: + return true; + } + } else if (defer_t == SDEB_DEFER_WQ) { + /* Cancel if pending */ + if (cancel_work_sync(&sd_dp->ew.work)) + return true; + /* Was not pending, so it must have run */ + return false; + } else if (defer_t == SDEB_DEFER_POLL) { + return true; + } + + return false; } -/* If @cmnd found deletes its timer or work queue and returns true; else - returns false */ -static bool stop_queued_cmnd(struct scsi_cmnd *cmnd) + +static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd, int *sqa_idx) { - unsigned long iflags; - int j, k, qmax, r_qmax; enum sdeb_defer_type l_defer_t; - struct sdebug_queue *sqp; struct sdebug_queued_cmd *sqcp; struct sdebug_defer *sd_dp; + struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { + lockdep_assert_held(&sdsc->lock); + + sqcp = TO_QUEUED_CMD(cmnd); + if (!sqcp) + return false; + sd_dp = &sqcp->sd_dp; + if (sqa_idx) + *sqa_idx = sd_dp->sqa_idx; + l_defer_t = READ_ONCE(sd_dp->defer_t); + ASSIGN_QUEUED_CMD(cmnd, NULL); + + if (stop_qc_helper(sd_dp, l_defer_t)) + sdebug_free_queued_cmd(sqcp); + + return true; +} + +/* + * Called from scsi_debug_abort() only, which is for timed-out cmd. + */ +static bool scsi_debug_abort_cmnd(struct scsi_cmnd *cmnd) +{ + struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); + struct sdebug_queue *sqp = get_queue(cmnd); + unsigned long flags, iflags; + int k = -1; + bool res; + + spin_lock_irqsave(&sdsc->lock, flags); + res = scsi_debug_stop_cmnd(cmnd, &k); + spin_unlock_irqrestore(&sdsc->lock, flags); + + if (k >= 0) { spin_lock_irqsave(&sqp->qc_lock, iflags); - qmax = sdebug_max_queue; - r_qmax = atomic_read(&retired_max_queue); - if (r_qmax > qmax) - qmax = r_qmax; - for (k = 0; k < qmax; ++k) { - if (test_bit(k, sqp->in_use_bm)) { - sqcp = &sqp->qc_arr[k]; - if (cmnd != sqcp->a_cmnd) - continue; - /* found */ - sqcp->a_cmnd = NULL; - sd_dp = sqcp->sd_dp; - if (sd_dp) { - l_defer_t = READ_ONCE(sd_dp->defer_t); - WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_NONE); - } else - l_defer_t = SDEB_DEFER_NONE; - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - stop_qc_helper(sd_dp, l_defer_t); - clear_bit(k, sqp->in_use_bm); - return true; - } - } + clear_bit(k, sqp->in_use_bm); + sqp->qc_arr[k] = NULL; spin_unlock_irqrestore(&sqp->qc_lock, iflags); } - return false; + + return res; } /* Deletes (stops) timers or work queues of all queued commands */ static void stop_all_queued(void) { - unsigned long iflags; + unsigned long iflags, flags; int j, k; - enum sdeb_defer_type l_defer_t; struct sdebug_queue *sqp; - struct sdebug_queued_cmd *sqcp; - struct sdebug_defer *sd_dp; for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { spin_lock_irqsave(&sqp->qc_lock, iflags); for (k = 0; k < SDEBUG_CANQUEUE; ++k) { if (test_bit(k, sqp->in_use_bm)) { - sqcp = &sqp->qc_arr[k]; - if (sqcp->a_cmnd == NULL) + struct sdebug_queued_cmd *sqcp = sqp->qc_arr[k]; + struct sdebug_scsi_cmd *sdsc; + struct scsi_cmnd *scmd; + + if (!sqcp) continue; - sqcp->a_cmnd = NULL; - sd_dp = sqcp->sd_dp; - if (sd_dp) { - l_defer_t = READ_ONCE(sd_dp->defer_t); - WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_NONE); - } else - l_defer_t = SDEB_DEFER_NONE; - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - stop_qc_helper(sd_dp, l_defer_t); + scmd = sqcp->scmd; + if (!scmd) + continue; + sdsc = scsi_cmd_priv(scmd); + spin_lock_irqsave(&sdsc->lock, flags); + if (TO_QUEUED_CMD(scmd) != sqcp) { + spin_unlock_irqrestore(&sdsc->lock, flags); + continue; + } + scsi_debug_stop_cmnd(scmd, NULL); + spin_unlock_irqrestore(&sdsc->lock, flags); + sqp->qc_arr[k] = NULL; clear_bit(k, sqp->in_use_bm); - spin_lock_irqsave(&sqp->qc_lock, iflags); } } spin_unlock_irqrestore(&sqp->qc_lock, iflags); } } -/* Free queued command memory on heap */ -static void free_all_queued(void) -{ - int j, k; - struct sdebug_queue *sqp; - struct sdebug_queued_cmd *sqcp; - - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { - for (k = 0; k < SDEBUG_CANQUEUE; ++k) { - sqcp = &sqp->qc_arr[k]; - kfree(sqcp->sd_dp); - sqcp->sd_dp = NULL; - } - } -} - static int scsi_debug_abort(struct scsi_cmnd *SCpnt) { - bool ok; + bool ok = scsi_debug_abort_cmnd(SCpnt); ++num_aborts; - ok = stop_queued_cmnd(SCpnt); if (SDEBUG_OPT_ALL_NOISE & sdebug_opts) sdev_printk(KERN_INFO, SCpnt->device, "%s: command%s found\n", __func__, @@ -5543,6 +5569,34 @@ static bool inject_on_this_cmd(void) #define INCLUSIVE_TIMING_MAX_NS 1000000 /* 1 millisecond */ + +void sdebug_free_queued_cmd(struct sdebug_queued_cmd *sqcp) +{ + if (sqcp) + kmem_cache_free(queued_cmd_cache, sqcp); +} + +static struct sdebug_queued_cmd *sdebug_alloc_queued_cmd(struct scsi_cmnd *scmd) +{ + struct sdebug_queued_cmd *sqcp; + struct sdebug_defer *sd_dp; + + sqcp = kmem_cache_zalloc(queued_cmd_cache, GFP_ATOMIC); + if (!sqcp) + return NULL; + + sd_dp = &sqcp->sd_dp; + + hrtimer_init(&sd_dp->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + sd_dp->hrt.function = sdebug_q_cmd_hrt_complete; + INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete); + + sqcp->scmd = scmd; + sd_dp->sqa_idx = -1; + + return sqcp; +} + /* Complete the processing of the thread that queued a SCSI command to this * driver. It either completes the command by calling cmnd_done() or * schedules a hr timer or work queue then returns 0. Returns @@ -5554,15 +5608,16 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, struct sdebug_dev_info *), int delta_jiff, int ndelay) { - bool new_sd_dp; - bool polled = scsi_cmd_to_rq(cmnd)->cmd_flags & REQ_POLLED; - int k; - unsigned long iflags; + struct request *rq = scsi_cmd_to_rq(cmnd); + bool polled = rq->cmd_flags & REQ_POLLED; + struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); + unsigned long iflags, flags; u64 ns_from_boot = 0; struct sdebug_queue *sqp; struct sdebug_queued_cmd *sqcp; struct scsi_device *sdp; struct sdebug_defer *sd_dp; + int k; if (unlikely(devip == NULL)) { if (scsi_result == 0) @@ -5606,22 +5661,17 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, goto respond_in_thread; } set_bit(k, sqp->in_use_bm); - sqcp = &sqp->qc_arr[k]; - sqcp->a_cmnd = cmnd; - cmnd->host_scribble = (unsigned char *)sqcp; - sd_dp = sqcp->sd_dp; - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - if (!sd_dp) { - sd_dp = kzalloc(sizeof(*sd_dp), GFP_ATOMIC); - if (!sd_dp) { - clear_bit(k, sqp->in_use_bm); - return SCSI_MLQUEUE_HOST_BUSY; - } - new_sd_dp = true; - } else { - new_sd_dp = false; + sqcp = sdebug_alloc_queued_cmd(cmnd); + if (!sqcp) { + clear_bit(k, sqp->in_use_bm); + spin_unlock_irqrestore(&sqp->qc_lock, iflags); + return SCSI_MLQUEUE_HOST_BUSY; } + sd_dp = &sqcp->sd_dp; + sd_dp->sqa_idx = k; + sqp->qc_arr[k] = sqcp; + spin_unlock_irqrestore(&sqp->qc_lock, iflags); /* Set the hostwide tag */ if (sdebug_host_max_queue) @@ -5673,12 +5723,11 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, if (kt <= d) { /* elapsed duration >= kt */ spin_lock_irqsave(&sqp->qc_lock, iflags); - sqcp->a_cmnd = NULL; + sqp->qc_arr[k] = NULL; clear_bit(k, sqp->in_use_bm); spin_unlock_irqrestore(&sqp->qc_lock, iflags); - if (new_sd_dp) - kfree(sd_dp); /* call scsi_done() from this thread */ + sdebug_free_queued_cmd(sqcp); scsi_done(cmnd); return 0; } @@ -5686,33 +5735,28 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, kt -= d; } } - if (polled) { - sd_dp->cmpl_ts = ktime_add(ns_to_ktime(ns_from_boot), kt); - spin_lock_irqsave(&sqp->qc_lock, iflags); - if (!sd_dp->init_poll) { - sd_dp->init_poll = true; - sqcp->sd_dp = sd_dp; - sd_dp->sqa_idx = sqp - sdebug_q_arr; - sd_dp->qc_idx = k; - } - WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_POLL); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - } else { - if (!sd_dp->init_hrt) { - sd_dp->init_hrt = true; - sqcp->sd_dp = sd_dp; - hrtimer_init(&sd_dp->hrt, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_PINNED); - sd_dp->hrt.function = sdebug_q_cmd_hrt_complete; - sd_dp->sqa_idx = sqp - sdebug_q_arr; - sd_dp->qc_idx = k; - } - WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_HRT); - /* schedule the invocation of scsi_done() for a later time */ - hrtimer_start(&sd_dp->hrt, kt, HRTIMER_MODE_REL_PINNED); - } if (sdebug_statistics) sd_dp->issuing_cpu = raw_smp_processor_id(); + if (polled) { + spin_lock_irqsave(&sdsc->lock, flags); + sd_dp->cmpl_ts = ktime_add(ns_to_ktime(ns_from_boot), kt); + ASSIGN_QUEUED_CMD(cmnd, sqcp); + WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_POLL); + spin_unlock_irqrestore(&sdsc->lock, flags); + } else { + /* schedule the invocation of scsi_done() for a later time */ + spin_lock_irqsave(&sdsc->lock, flags); + ASSIGN_QUEUED_CMD(cmnd, sqcp); + WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_HRT); + hrtimer_start(&sd_dp->hrt, kt, HRTIMER_MODE_REL_PINNED); + /* + * The completion handler will try to grab sqcp->lock, + * so there is no chance that the completion handler + * will call scsi_done() until we release the lock + * here (so ok to keep referencing sdsc). + */ + spin_unlock_irqrestore(&sdsc->lock, flags); + } } else { /* jdelay < 0, use work queue */ if (unlikely((sdebug_opts & SDEBUG_OPT_CMD_ABORT) && atomic_read(&sdeb_inject_pending))) { @@ -5722,30 +5766,21 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, blk_mq_unique_tag_to_tag(get_tag(cmnd))); } - if (polled) { - sd_dp->cmpl_ts = ns_to_ktime(ns_from_boot); - spin_lock_irqsave(&sqp->qc_lock, iflags); - if (!sd_dp->init_poll) { - sd_dp->init_poll = true; - sqcp->sd_dp = sd_dp; - sd_dp->sqa_idx = sqp - sdebug_q_arr; - sd_dp->qc_idx = k; - } - WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_POLL); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - } else { - if (!sd_dp->init_wq) { - sd_dp->init_wq = true; - sqcp->sd_dp = sd_dp; - sd_dp->sqa_idx = sqp - sdebug_q_arr; - sd_dp->qc_idx = k; - INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete); - } - WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_WQ); - schedule_work(&sd_dp->ew.work); - } if (sdebug_statistics) sd_dp->issuing_cpu = raw_smp_processor_id(); + if (polled) { + spin_lock_irqsave(&sdsc->lock, flags); + ASSIGN_QUEUED_CMD(cmnd, sqcp); + sd_dp->cmpl_ts = ns_to_ktime(ns_from_boot); + WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_POLL); + spin_unlock_irqrestore(&sdsc->lock, flags); + } else { + spin_lock_irqsave(&sdsc->lock, flags); + ASSIGN_QUEUED_CMD(cmnd, sqcp); + WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_WQ); + schedule_work(&sd_dp->ew.work); + spin_unlock_irqrestore(&sdsc->lock, flags); + } } return 0; @@ -7066,6 +7101,10 @@ static int __init scsi_debug_init(void) hosts_to_add = sdebug_add_host; sdebug_add_host = 0; + queued_cmd_cache = KMEM_CACHE(sdebug_queued_cmd, SLAB_HWCACHE_ALIGN); + if (!queued_cmd_cache) + goto driver_unreg; + for (k = 0; k < hosts_to_add; k++) { if (want_store && k == 0) { ret = sdebug_add_host_helper(idx); @@ -7088,6 +7127,8 @@ static int __init scsi_debug_init(void) return 0; +driver_unreg: + driver_unregister(&sdebug_driverfs_driver); bus_unreg: bus_unregister(&pseudo_lld_bus); dev_unreg: @@ -7103,10 +7144,9 @@ static void __exit scsi_debug_exit(void) { int k = sdebug_num_hosts; - stop_all_queued(); for (; k; k--) sdebug_do_remove_host(true); - free_all_queued(); + kmem_cache_destroy(queued_cmd_cache); driver_unregister(&sdebug_driverfs_driver); bus_unregister(&pseudo_lld_bus); root_device_unregister(pseudo_primary); @@ -7493,6 +7533,8 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) goto unlock; for (first = true; first || qc_idx + 1 < sdebug_max_queue; ) { + unsigned long flags; + struct sdebug_scsi_cmd *sdsc; if (first) { first = false; if (!test_bit(qc_idx, sqp->in_use_bm)) @@ -7503,37 +7545,60 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) if (qc_idx >= sdebug_max_queue) break; - sqcp = &sqp->qc_arr[qc_idx]; - sd_dp = sqcp->sd_dp; - if (unlikely(!sd_dp)) - continue; - scp = sqcp->a_cmnd; + sqcp = sqp->qc_arr[qc_idx]; + if (!sqcp) { + pr_err("sqcp is NULL, queue_num=%d, qc_idx=%u from %s\n", + queue_num, qc_idx, __func__); + break; + } + sd_dp = &sqcp->sd_dp; + + scp = sqcp->scmd; if (unlikely(scp == NULL)) { pr_err("scp is NULL, queue_num=%d, qc_idx=%u from %s\n", queue_num, qc_idx, __func__); break; } + sdsc = scsi_cmd_priv(scp); + spin_lock_irqsave(&sdsc->lock, flags); if (READ_ONCE(sd_dp->defer_t) == SDEB_DEFER_POLL) { - if (kt_from_boot < sd_dp->cmpl_ts) - continue; + struct sdebug_queued_cmd *_sqcp = TO_QUEUED_CMD(scp); - } else /* ignoring non REQ_POLLED requests */ + if (_sqcp != sqcp) { + pr_err("inconsistent queued cmd tag=%#x\n", + blk_mq_unique_tag(scsi_cmd_to_rq(scp))); + spin_unlock_irqrestore(&sdsc->lock, flags); + continue; + } + + if (kt_from_boot < sd_dp->cmpl_ts) { + spin_unlock_irqrestore(&sdsc->lock, flags); + continue; + } + + } else /* ignoring non REQ_POLLED requests */ { + spin_unlock_irqrestore(&sdsc->lock, flags); continue; + } if (unlikely(atomic_read(&retired_max_queue) > 0)) retiring = true; - sqcp->a_cmnd = NULL; if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) { + spin_unlock_irqrestore(&sdsc->lock, flags); pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%u from %s\n", sqp, queue_num, qc_idx, __func__); + sdebug_free_queued_cmd(sqcp); break; } + sqp->qc_arr[qc_idx] = NULL; if (unlikely(retiring)) { /* user has reduced max_queue */ int k, retval; retval = atomic_read(&retired_max_queue); if (qc_idx >= retval) { pr_err("index %d too large\n", retval); + spin_unlock_irqrestore(&sdsc->lock, flags); + sdebug_free_queued_cmd(sqcp); break; } k = find_last_bit(sqp->in_use_bm, retval); @@ -7542,7 +7607,7 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) else atomic_set(&retired_max_queue, k + 1); } - WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_NONE); + spin_unlock_irqrestore(&sdsc->lock, flags); spin_unlock_irqrestore(&sqp->qc_lock, iflags); if (sdebug_statistics) { @@ -7551,6 +7616,8 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) atomic_inc(&sdebug_miss_cpus); } + sdebug_free_queued_cmd(sqcp); + scsi_done(scp); /* callback to mid level */ num_entries++; spin_lock_irqsave(&sqp->qc_lock, iflags); @@ -7733,6 +7800,16 @@ err_out: return schedule_resp(scp, NULL, DID_NO_CONNECT << 16, NULL, 0, 0); } +static int sdebug_init_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd) +{ + struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmd); + + spin_lock_init(&sdsc->lock); + + return 0; +} + + static struct scsi_host_template sdebug_driver_template = { .show_info = scsi_debug_show_info, .write_info = scsi_debug_write_info, @@ -7760,6 +7837,8 @@ static struct scsi_host_template sdebug_driver_template = { .max_segment_size = -1U, .module = THIS_MODULE, .track_queue_depth = 1, + .cmd_size = sizeof(struct sdebug_scsi_cmd), + .init_cmd_priv = sdebug_init_cmd_priv, }; static int sdebug_driver_probe(struct device *dev) From 600d9ead3936b2f22e664c59345a2e006ff324c5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 27 Mar 2023 07:43:06 +0000 Subject: [PATCH 07/11] scsi: scsi_debug: Use blk_mq_tagset_busy_iter() in sdebug_blk_mq_poll() Instead of iterating all deferred commands in the submission queue structures, use blk_mq_tagset_busy_iter(), which is a standard API for this. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20230327074310.1862889-8-john.g.garry@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 213 +++++++++++++++++++------------------- 1 file changed, 107 insertions(+), 106 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 1c85cbd92178..4ec94ba593e8 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -7511,123 +7511,124 @@ static void sdebug_map_queues(struct Scsi_Host *shost) } } +struct sdebug_blk_mq_poll_data { + unsigned int queue_num; + int *num_entries; +}; + +/* + * We don't handle aborted commands here, but it does not seem possible to have + * aborted polled commands from schedule_resp() + */ +static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) +{ + struct sdebug_blk_mq_poll_data *data = opaque; + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); + struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmd); + struct sdebug_defer *sd_dp; + u32 unique_tag = blk_mq_unique_tag(rq); + u16 hwq = blk_mq_unique_tag_to_hwq(unique_tag); + struct sdebug_queued_cmd *sqcp; + struct sdebug_queue *sqp; + unsigned long flags; + int queue_num = data->queue_num; + bool retiring = false; + int qc_idx; + ktime_t time; + + /* We're only interested in one queue for this iteration */ + if (hwq != queue_num) + return true; + + /* Subsequent checks would fail if this failed, but check anyway */ + if (!test_bit(SCMD_STATE_INFLIGHT, &cmd->state)) + return true; + + time = ktime_get_boottime(); + + spin_lock_irqsave(&sdsc->lock, flags); + sqcp = TO_QUEUED_CMD(cmd); + if (!sqcp) { + spin_unlock_irqrestore(&sdsc->lock, flags); + return true; + } + + sqp = sdebug_q_arr + queue_num; + sd_dp = &sqcp->sd_dp; + + if (READ_ONCE(sd_dp->defer_t) != SDEB_DEFER_POLL) { + spin_unlock_irqrestore(&sdsc->lock, flags); + return true; + } + + if (time < sd_dp->cmpl_ts) { + spin_unlock_irqrestore(&sdsc->lock, flags); + return true; + } + + if (unlikely(atomic_read(&retired_max_queue) > 0)) + retiring = true; + + qc_idx = sd_dp->sqa_idx; + sqp->qc_arr[qc_idx] = NULL; + if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) { + spin_unlock_irqrestore(&sdsc->lock, flags); + pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%u\n", + sqp, queue_num, qc_idx); + sdebug_free_queued_cmd(sqcp); + return true; + } + + if (unlikely(retiring)) { /* user has reduced max_queue */ + int k, retval = atomic_read(&retired_max_queue); + + if (qc_idx >= retval) { + pr_err("index %d too large\n", retval); + spin_unlock_irqrestore(&sdsc->lock, flags); + sdebug_free_queued_cmd(sqcp); + return true; + } + + k = find_last_bit(sqp->in_use_bm, retval); + if ((k < sdebug_max_queue) || (k == retval)) + atomic_set(&retired_max_queue, 0); + else + atomic_set(&retired_max_queue, k + 1); + } + + ASSIGN_QUEUED_CMD(cmd, NULL); + spin_unlock_irqrestore(&sdsc->lock, flags); + + if (sdebug_statistics) { + atomic_inc(&sdebug_completions); + if (raw_smp_processor_id() != sd_dp->issuing_cpu) + atomic_inc(&sdebug_miss_cpus); + } + + sdebug_free_queued_cmd(sqcp); + + scsi_done(cmd); /* callback to mid level */ + (*data->num_entries)++; + return true; +} + static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) { - bool first; - bool retiring = false; int num_entries = 0; - unsigned int qc_idx = 0; unsigned long iflags; - ktime_t kt_from_boot = ktime_get_boottime(); struct sdebug_queue *sqp; - struct sdebug_queued_cmd *sqcp; - struct scsi_cmnd *scp; - struct sdebug_defer *sd_dp; - + struct sdebug_blk_mq_poll_data data = { + .queue_num = queue_num, + .num_entries = &num_entries, + }; sqp = sdebug_q_arr + queue_num; spin_lock_irqsave(&sqp->qc_lock, iflags); - qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue); - if (qc_idx >= sdebug_max_queue) - goto unlock; + blk_mq_tagset_busy_iter(&shost->tag_set, sdebug_blk_mq_poll_iter, + &data); - for (first = true; first || qc_idx + 1 < sdebug_max_queue; ) { - unsigned long flags; - struct sdebug_scsi_cmd *sdsc; - if (first) { - first = false; - if (!test_bit(qc_idx, sqp->in_use_bm)) - continue; - } else { - qc_idx = find_next_bit(sqp->in_use_bm, sdebug_max_queue, qc_idx + 1); - } - if (qc_idx >= sdebug_max_queue) - break; - - sqcp = sqp->qc_arr[qc_idx]; - if (!sqcp) { - pr_err("sqcp is NULL, queue_num=%d, qc_idx=%u from %s\n", - queue_num, qc_idx, __func__); - break; - } - sd_dp = &sqcp->sd_dp; - - scp = sqcp->scmd; - if (unlikely(scp == NULL)) { - pr_err("scp is NULL, queue_num=%d, qc_idx=%u from %s\n", - queue_num, qc_idx, __func__); - break; - } - sdsc = scsi_cmd_priv(scp); - spin_lock_irqsave(&sdsc->lock, flags); - if (READ_ONCE(sd_dp->defer_t) == SDEB_DEFER_POLL) { - struct sdebug_queued_cmd *_sqcp = TO_QUEUED_CMD(scp); - - if (_sqcp != sqcp) { - pr_err("inconsistent queued cmd tag=%#x\n", - blk_mq_unique_tag(scsi_cmd_to_rq(scp))); - spin_unlock_irqrestore(&sdsc->lock, flags); - continue; - } - - if (kt_from_boot < sd_dp->cmpl_ts) { - spin_unlock_irqrestore(&sdsc->lock, flags); - continue; - } - - } else /* ignoring non REQ_POLLED requests */ { - spin_unlock_irqrestore(&sdsc->lock, flags); - continue; - } - if (unlikely(atomic_read(&retired_max_queue) > 0)) - retiring = true; - - if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) { - spin_unlock_irqrestore(&sdsc->lock, flags); - pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%u from %s\n", - sqp, queue_num, qc_idx, __func__); - sdebug_free_queued_cmd(sqcp); - break; - } - sqp->qc_arr[qc_idx] = NULL; - if (unlikely(retiring)) { /* user has reduced max_queue */ - int k, retval; - - retval = atomic_read(&retired_max_queue); - if (qc_idx >= retval) { - pr_err("index %d too large\n", retval); - spin_unlock_irqrestore(&sdsc->lock, flags); - sdebug_free_queued_cmd(sqcp); - break; - } - k = find_last_bit(sqp->in_use_bm, retval); - if ((k < sdebug_max_queue) || (k == retval)) - atomic_set(&retired_max_queue, 0); - else - atomic_set(&retired_max_queue, k + 1); - } - spin_unlock_irqrestore(&sdsc->lock, flags); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - - if (sdebug_statistics) { - atomic_inc(&sdebug_completions); - if (raw_smp_processor_id() != sd_dp->issuing_cpu) - atomic_inc(&sdebug_miss_cpus); - } - - sdebug_free_queued_cmd(sqcp); - - scsi_done(scp); /* callback to mid level */ - num_entries++; - spin_lock_irqsave(&sqp->qc_lock, iflags); - if (find_first_bit(sqp->in_use_bm, sdebug_max_queue) >= sdebug_max_queue) - break; - } - -unlock: spin_unlock_irqrestore(&sqp->qc_lock, iflags); - if (num_entries > 0) atomic_add(num_entries, &sdeb_mq_poll_count); return num_entries; From 9c559c9b4748fed11687694e65e5d6d1eb2919cd Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 27 Mar 2023 07:43:07 +0000 Subject: [PATCH 08/11] scsi: scsi_debug: Use blk_mq_tagset_busy_iter() in stop_all_queued() Instead of iterating all deferred commands in the submission queue structures, use blk_mq_tagset_busy_iter(), which is a standard API for this. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20230327074310.1862889-9-john.g.garry@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 45 +++++++++++++++------------------------ 1 file changed, 17 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 4ec94ba593e8..9e1586b127bc 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -5326,40 +5326,29 @@ static bool scsi_debug_abort_cmnd(struct scsi_cmnd *cmnd) return res; } +/* + * All we can do is set the cmnd as internally aborted and wait for it to + * finish. We cannot call scsi_done() as normal completion path may do that. + */ +static bool sdebug_stop_cmnd(struct request *rq, void *data) +{ + scsi_debug_abort_cmnd(blk_mq_rq_to_pdu(rq)); + + return true; +} + /* Deletes (stops) timers or work queues of all queued commands */ static void stop_all_queued(void) { - unsigned long iflags, flags; - int j, k; - struct sdebug_queue *sqp; + struct sdebug_host_info *sdhp; - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { - spin_lock_irqsave(&sqp->qc_lock, iflags); - for (k = 0; k < SDEBUG_CANQUEUE; ++k) { - if (test_bit(k, sqp->in_use_bm)) { - struct sdebug_queued_cmd *sqcp = sqp->qc_arr[k]; - struct sdebug_scsi_cmd *sdsc; - struct scsi_cmnd *scmd; + mutex_lock(&sdebug_host_list_mutex); + list_for_each_entry(sdhp, &sdebug_host_list, host_list) { + struct Scsi_Host *shost = sdhp->shost; - if (!sqcp) - continue; - scmd = sqcp->scmd; - if (!scmd) - continue; - sdsc = scsi_cmd_priv(scmd); - spin_lock_irqsave(&sdsc->lock, flags); - if (TO_QUEUED_CMD(scmd) != sqcp) { - spin_unlock_irqrestore(&sdsc->lock, flags); - continue; - } - scsi_debug_stop_cmnd(scmd, NULL); - spin_unlock_irqrestore(&sdsc->lock, flags); - sqp->qc_arr[k] = NULL; - clear_bit(k, sqp->in_use_bm); - } - } - spin_unlock_irqrestore(&sqp->qc_lock, iflags); + blk_mq_tagset_busy_iter(&shost->tag_set, sdebug_stop_cmnd, NULL); } + mutex_unlock(&sdebug_host_list_mutex); } static int scsi_debug_abort(struct scsi_cmnd *SCpnt) From 12f3eef016ea7a72c6e0d0fe6f66882086d9c4a9 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 27 Mar 2023 07:43:08 +0000 Subject: [PATCH 09/11] scsi: scsi_debug: Use scsi_host_busy() in delay_store() and ndelay_store() The functions to update ndelay and delay value first check whether we have any in-flight IO for any host. It does this by checking if any tag is used in the global submit queues. We can achieve the same by setting the host as blocked and then ensuring that we have no in-flight commands with scsi_host_busy(). Note that scsi_host_busy() checks SCMD_STATE_INFLIGHT flag, which is only set per command after we ensure that the host is not blocked, i.e. we see more commands active after the check for scsi_host_busy() returns 0. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20230327074310.1862889-10-john.g.garry@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 9e1586b127bc..9a741b1983ca 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -6062,16 +6062,15 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf, if (count > 0 && sscanf(buf, "%d", &jdelay) == 1) { res = count; if (sdebug_jdelay != jdelay) { - int j, k; - struct sdebug_queue *sqp; + struct sdebug_host_info *sdhp; mutex_lock(&sdebug_host_list_mutex); block_unblock_all_queues(true); - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; - ++j, ++sqp) { - k = find_first_bit(sqp->in_use_bm, - sdebug_max_queue); - if (k != sdebug_max_queue) { + + list_for_each_entry(sdhp, &sdebug_host_list, host_list) { + struct Scsi_Host *shost = sdhp->shost; + + if (scsi_host_busy(shost)) { res = -EBUSY; /* queued commands */ break; } @@ -6104,20 +6103,20 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf, (ndelay >= 0) && (ndelay < (1000 * 1000 * 1000))) { res = count; if (sdebug_ndelay != ndelay) { - int j, k; - struct sdebug_queue *sqp; + struct sdebug_host_info *sdhp; mutex_lock(&sdebug_host_list_mutex); block_unblock_all_queues(true); - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; - ++j, ++sqp) { - k = find_first_bit(sqp->in_use_bm, - sdebug_max_queue); - if (k != sdebug_max_queue) { + + list_for_each_entry(sdhp, &sdebug_host_list, host_list) { + struct Scsi_Host *shost = sdhp->shost; + + if (scsi_host_busy(shost)) { res = -EBUSY; /* queued commands */ break; } } + if (res > 0) { sdebug_ndelay = ndelay; sdebug_jdelay = ndelay ? JDELAY_OVERRIDDEN From 57f7225a4fe25425c29402adad990c7409958c40 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 27 Mar 2023 07:43:09 +0000 Subject: [PATCH 10/11] scsi: scsi_debug: Only allow sdebug_max_queue be modified when no shosts The shost->can_queue value is initially used to set per-HW queue context tag depth in the block layer. This ensures that the shost is not sent too many commands which it can deal with. However lowering sdebug_max_queue separately means that we can easily overload the shost, as in the following example: $ cat /sys/bus/pseudo/drivers/scsi_debug/max_queue 192 $ cat /sys/class/scsi_host/host0/can_queue 192 $ echo 100 > /sys/bus/pseudo/drivers/scsi_debug/max_queue $ cat /sys/class/scsi_host/host0/can_queue 192 $ fio --filename=/dev/sda --direct=1 --rw=read --bs=4k --iodepth=256 --runtime=1200 --numjobs=10 --time_based --group_reporting --name=iops-test-job --eta-newline=1 --readonly --ioengine=io_uring --hipri --exitall_on_error iops-test-job: (g=0): rw=read, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=io_uring, iodepth=256 ... fio-3.28 Starting 10 processes [ 111.269885] scsi_io_completion_action: 400 callbacks suppressed [ 111.269885] blk_print_req_error: 400 callbacks suppressed [ 111.269889] I/O error, dev sda, sector 440 op 0x0:(READ) flags 0x1200000 phys_seg 1 prio class 2 [ 111.269892] sd 0:0:0:0: [sda] tag#132 FAILED Result: hostbyte=DID_ABORT driverbyte=DRIVER_OK cmd_age=0s [ 111.269897] sd 0:0:0:0: [sda] tag#132 CDB: Read(10) 28 00 00 00 01 68 00 00 08 00 [ 111.277058] I/O error, dev sda, sector 360 op 0x0:(READ) flags 0x1200000 phys_seg 1 prio class 2 [...] Ensure that this cannot happen by allowing sdebug_max_queue be modified only when we have no shosts. As such, any shost->can_queue value will match sdebug_max_queue, and sdebug_max_queue cannot be modified separately. Since retired_max_queue is no longer set, remove support. Continue to apply the restriction that sdebug_host_max_queue cannot be modified when sdebug_host_max_queue is set. Adding support for that would mean extra code, and no one has complained about this restriction previously. A command like the following may be used to remove a shost: echo -1 > /sys/bus/pseudo/drivers/scsi_debug/add_host Signed-off-by: John Garry Link: https://lore.kernel.org/r/20230327074310.1862889-11-john.g.garry@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 67 ++++----------------------------------- 1 file changed, 6 insertions(+), 61 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 9a741b1983ca..66b0c831da3b 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -762,7 +762,6 @@ static int sdebug_max_luns = DEF_MAX_LUNS; static int sdebug_max_queue = SDEBUG_CANQUEUE; /* per submit queue */ static unsigned int sdebug_medium_error_start = OPT_MEDIUM_ERR_ADDR; static int sdebug_medium_error_count = OPT_MEDIUM_ERR_NUM; -static atomic_t retired_max_queue; /* if > 0 then was prior max_queue */ static int sdebug_ndelay = DEF_NDELAY; /* if > 0 then unit is nanoseconds */ static int sdebug_no_lun_0 = DEF_NO_LUN_0; static int sdebug_no_uld; @@ -4928,7 +4927,6 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp) { struct sdebug_queued_cmd *sqcp = container_of(sd_dp, struct sdebug_queued_cmd, sd_dp); int qc_idx; - int retiring = 0; unsigned long flags, iflags; struct scsi_cmnd *scp = sqcp->scmd; struct sdebug_scsi_cmd *sdsc; @@ -4959,9 +4957,6 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp) sd_dp->aborted = false; ASSIGN_QUEUED_CMD(scp, NULL); - if (unlikely(atomic_read(&retired_max_queue) > 0)) - retiring = 1; - sqp->qc_arr[qc_idx] = NULL; if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) { spin_unlock_irqrestore(&sdsc->lock, flags); @@ -4970,23 +4965,6 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp) goto out; } - if (unlikely(retiring)) { /* user has reduced max_queue */ - int k, retval; - - retval = atomic_read(&retired_max_queue); - if (qc_idx >= retval) { - spin_unlock_irqrestore(&sdsc->lock, flags); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - pr_err("index %d too large\n", retval); - goto out; - } - k = find_last_bit(sqp->in_use_bm, retval); - if ((k < sdebug_max_queue) || (k == retval)) - atomic_set(&retired_max_queue, 0); - else - atomic_set(&retired_max_queue, k + 1); - } - spin_unlock_irqrestore(&sdsc->lock, flags); spin_unlock_irqrestore(&sqp->qc_lock, iflags); @@ -6431,29 +6409,18 @@ static ssize_t max_queue_show(struct device_driver *ddp, char *buf) static ssize_t max_queue_store(struct device_driver *ddp, const char *buf, size_t count) { - int j, n, k, a; - struct sdebug_queue *sqp; + int n; if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) && (n <= SDEBUG_CANQUEUE) && (sdebug_host_max_queue == 0)) { mutex_lock(&sdebug_host_list_mutex); - block_unblock_all_queues(true); - k = 0; - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; - ++j, ++sqp) { - a = find_last_bit(sqp->in_use_bm, SDEBUG_CANQUEUE); - if (a > k) - k = a; - } - sdebug_max_queue = n; - if (k == SDEBUG_CANQUEUE) - atomic_set(&retired_max_queue, 0); - else if (k >= n) - atomic_set(&retired_max_queue, k + 1); + + /* We may only change sdebug_max_queue when we have no shosts */ + if (list_empty(&sdebug_host_list)) + sdebug_max_queue = n; else - atomic_set(&retired_max_queue, 0); - block_unblock_all_queues(false); + count = -EBUSY; mutex_unlock(&sdebug_host_list_mutex); return count; } @@ -6882,7 +6849,6 @@ static int __init scsi_debug_init(void) ramdisk_lck_a[0] = &atomic_rw; ramdisk_lck_a[1] = &atomic_rw2; - atomic_set(&retired_max_queue, 0); if (sdebug_ndelay >= 1000 * 1000 * 1000) { pr_warn("ndelay must be less than 1 second, ignored\n"); @@ -7520,7 +7486,6 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) struct sdebug_queue *sqp; unsigned long flags; int queue_num = data->queue_num; - bool retiring = false; int qc_idx; ktime_t time; @@ -7554,9 +7519,6 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) return true; } - if (unlikely(atomic_read(&retired_max_queue) > 0)) - retiring = true; - qc_idx = sd_dp->sqa_idx; sqp->qc_arr[qc_idx] = NULL; if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) { @@ -7567,23 +7529,6 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) return true; } - if (unlikely(retiring)) { /* user has reduced max_queue */ - int k, retval = atomic_read(&retired_max_queue); - - if (qc_idx >= retval) { - pr_err("index %d too large\n", retval); - spin_unlock_irqrestore(&sdsc->lock, flags); - sdebug_free_queued_cmd(sqcp); - return true; - } - - k = find_last_bit(sqp->in_use_bm, retval); - if ((k < sdebug_max_queue) || (k == retval)) - atomic_set(&retired_max_queue, 0); - else - atomic_set(&retired_max_queue, k + 1); - } - ASSIGN_QUEUED_CMD(cmd, NULL); spin_unlock_irqrestore(&sdsc->lock, flags); From f1437cd1e535c5d5cc9f6e5bfdfc9b1cd3141bc4 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 27 Mar 2023 07:43:10 +0000 Subject: [PATCH 11/11] scsi: scsi_debug: Drop sdebug_queue It's easy to get scsi_debug to error on throughput testing when we have multiple shosts: $ lsscsi [7:0:0:0] disk Linux scsi_debug 0191 [0:0:0:0] disk Linux scsi_debug 0191 $ fio --filename=/dev/sda --filename=/dev/sdb --direct=1 --rw=read --bs=4k --iodepth=256 --runtime=60 --numjobs=40 --time_based --name=jpg --eta-newline=1 --readonly --ioengine=io_uring --hipri --exitall_on_error jpg: (g=0): rw=read, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=io_uring, iodepth=256 ... fio-3.28 Starting 40 processes [ 27.521809] hrtimer: interrupt took 33067 ns [ 27.904660] sd 7:0:0:0: [sdb] tag#171 FAILED Result: hostbyte=DID_ABORT driverbyte=DRIVER_OK cmd_age=0s [ 27.904660] sd 0:0:0:0: [sda] tag#58 FAILED Result: hostbyte=DID_ABORT driverbyte=DRIVER_OK cmd_age=0s fio: io_u error [ 27.904667] sd 0:0:0:0: [sda] tag#58 CDB: Read(10) 28 00 00 00 27 00 00 01 18 00 on file /dev/sda[ 27.904670] sd 0:0:0:0: [sda] tag#62 FAILED Result: hostbyte=DID_ABORT driverbyte=DRIVER_OK cmd_age=0s The issue is related to how the driver manages submit queues and tags. A single array of submit queues - sdebug_q_arr - with its own set of tags is shared among all shosts. As such, for occasions when we have more than one shost it is possible to overload the submit queues and run out of tags. The struct sdebug_queue is to manage tags and hold the associated queued command entry pointer (for that tag). Since the tagset iters are now used for functions like sdebug_blk_mq_poll(), there is no need to manage these queues. Indeed, blk-mq already provides what we need for managing tags and queues. Drop sdebug_queue and all its usage in the driver. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20230327074310.1862889-12-john.g.garry@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 189 ++++++++++---------------------------- 1 file changed, 51 insertions(+), 138 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 66b0c831da3b..cac52bb43f1b 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -341,8 +341,6 @@ struct sdebug_defer { struct hrtimer hrt; struct execute_work ew; ktime_t cmpl_ts;/* time since boot to complete this cmd */ - int sqa_idx; /* index of sdebug_queue array */ - int hc_idx; /* hostwide tag index */ int issuing_cpu; bool aborted; /* true when blk_abort_request() already called */ enum sdeb_defer_type defer_t; @@ -360,12 +358,6 @@ struct sdebug_scsi_cmd { spinlock_t lock; }; -struct sdebug_queue { - struct sdebug_queued_cmd *qc_arr[SDEBUG_CANQUEUE]; - unsigned long in_use_bm[SDEBUG_CANQUEUE_WORDS]; - spinlock_t qc_lock; -}; - static atomic_t sdebug_cmnd_count; /* number of incoming commands */ static atomic_t sdebug_completions; /* count of deferred completions */ static atomic_t sdebug_miss_cpus; /* submission + completion cpus differ */ @@ -848,7 +840,6 @@ static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES; static int submit_queues = DEF_SUBMIT_QUEUES; /* > 1 for multi-queue (mq) */ static int poll_queues; /* iouring iopoll interface.*/ -static struct sdebug_queue *sdebug_q_arr; /* ptr to array of submit queues */ static DEFINE_RWLOCK(atomic_rw); static DEFINE_RWLOCK(atomic_rw2); @@ -4903,20 +4894,6 @@ fini: return res; } -static struct sdebug_queue *get_queue(struct scsi_cmnd *cmnd) -{ - u16 hwq; - u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(cmnd)); - - hwq = blk_mq_unique_tag_to_hwq(tag); - - pr_debug("tag=%#x, hwq=%d\n", tag, hwq); - if (WARN_ON_ONCE(hwq >= submit_queues)) - hwq = 0; - - return sdebug_q_arr + hwq; -} - static u32 get_tag(struct scsi_cmnd *cmnd) { return blk_mq_unique_tag(scsi_cmd_to_rq(cmnd)); @@ -4926,47 +4903,30 @@ static u32 get_tag(struct scsi_cmnd *cmnd) static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp) { struct sdebug_queued_cmd *sqcp = container_of(sd_dp, struct sdebug_queued_cmd, sd_dp); - int qc_idx; - unsigned long flags, iflags; + unsigned long flags; struct scsi_cmnd *scp = sqcp->scmd; struct sdebug_scsi_cmd *sdsc; bool aborted; - struct sdebug_queue *sqp; - qc_idx = sd_dp->sqa_idx; if (sdebug_statistics) { atomic_inc(&sdebug_completions); if (raw_smp_processor_id() != sd_dp->issuing_cpu) atomic_inc(&sdebug_miss_cpus); } + if (!scp) { pr_err("scmd=NULL\n"); goto out; } - if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE))) { - pr_err("wild qc_idx=%d\n", qc_idx); - goto out; - } sdsc = scsi_cmd_priv(scp); - sqp = get_queue(scp); - spin_lock_irqsave(&sqp->qc_lock, iflags); spin_lock_irqsave(&sdsc->lock, flags); aborted = sd_dp->aborted; if (unlikely(aborted)) sd_dp->aborted = false; ASSIGN_QUEUED_CMD(scp, NULL); - sqp->qc_arr[qc_idx] = NULL; - if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) { - spin_unlock_irqrestore(&sdsc->lock, flags); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - pr_err("Unexpected completion qc_idx=%d\n", qc_idx); - goto out; - } - spin_unlock_irqrestore(&sdsc->lock, flags); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); if (aborted) { pr_info("bypassing scsi_done() due to aborted cmd, kicking-off EH\n"); @@ -5255,21 +5215,18 @@ static bool stop_qc_helper(struct sdebug_defer *sd_dp, } -static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd, int *sqa_idx) +static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd) { enum sdeb_defer_type l_defer_t; - struct sdebug_queued_cmd *sqcp; struct sdebug_defer *sd_dp; struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); + struct sdebug_queued_cmd *sqcp = TO_QUEUED_CMD(cmnd); lockdep_assert_held(&sdsc->lock); - sqcp = TO_QUEUED_CMD(cmnd); if (!sqcp) return false; sd_dp = &sqcp->sd_dp; - if (sqa_idx) - *sqa_idx = sd_dp->sqa_idx; l_defer_t = READ_ONCE(sd_dp->defer_t); ASSIGN_QUEUED_CMD(cmnd, NULL); @@ -5285,22 +5242,13 @@ static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd, int *sqa_idx) static bool scsi_debug_abort_cmnd(struct scsi_cmnd *cmnd) { struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); - struct sdebug_queue *sqp = get_queue(cmnd); - unsigned long flags, iflags; - int k = -1; + unsigned long flags; bool res; spin_lock_irqsave(&sdsc->lock, flags); - res = scsi_debug_stop_cmnd(cmnd, &k); + res = scsi_debug_stop_cmnd(cmnd); spin_unlock_irqrestore(&sdsc->lock, flags); - if (k >= 0) { - spin_lock_irqsave(&sqp->qc_lock, iflags); - clear_bit(k, sqp->in_use_bm); - sqp->qc_arr[k] = NULL; - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - } - return res; } @@ -5559,7 +5507,6 @@ static struct sdebug_queued_cmd *sdebug_alloc_queued_cmd(struct scsi_cmnd *scmd) INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete); sqcp->scmd = scmd; - sd_dp->sqa_idx = -1; return sqcp; } @@ -5578,13 +5525,11 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, struct request *rq = scsi_cmd_to_rq(cmnd); bool polled = rq->cmd_flags & REQ_POLLED; struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); - unsigned long iflags, flags; + unsigned long flags; u64 ns_from_boot = 0; - struct sdebug_queue *sqp; struct sdebug_queued_cmd *sqcp; struct scsi_device *sdp; struct sdebug_defer *sd_dp; - int k; if (unlikely(devip == NULL)) { if (scsi_result == 0) @@ -5596,8 +5541,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, if (delta_jiff == 0) goto respond_in_thread; - sqp = get_queue(cmnd); - spin_lock_irqsave(&sqp->qc_lock, iflags); if (unlikely(sdebug_every_nth && (SDEBUG_OPT_RARE_TSF & sdebug_opts) && (scsi_result == 0))) { @@ -5616,33 +5559,12 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, } } - k = find_first_zero_bit(sqp->in_use_bm, sdebug_max_queue); - if (unlikely(k >= sdebug_max_queue)) { - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - if (scsi_result) - goto respond_in_thread; - scsi_result = device_qfull_result; - if (SDEBUG_OPT_Q_NOISE & sdebug_opts) - sdev_printk(KERN_INFO, sdp, "%s: max_queue=%d exceeded: TASK SET FULL\n", - __func__, sdebug_max_queue); - goto respond_in_thread; - } - set_bit(k, sqp->in_use_bm); - sqcp = sdebug_alloc_queued_cmd(cmnd); if (!sqcp) { - clear_bit(k, sqp->in_use_bm); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); + pr_err("%s no alloc\n", __func__); return SCSI_MLQUEUE_HOST_BUSY; } sd_dp = &sqcp->sd_dp; - sd_dp->sqa_idx = k; - sqp->qc_arr[k] = sqcp; - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - - /* Set the hostwide tag */ - if (sdebug_host_max_queue) - sd_dp->hc_idx = get_tag(cmnd); if (polled) ns_from_boot = ktime_get_boottime_ns(); @@ -5689,10 +5611,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, u64 d = ktime_get_boottime_ns() - ns_from_boot; if (kt <= d) { /* elapsed duration >= kt */ - spin_lock_irqsave(&sqp->qc_lock, iflags); - sqp->qc_arr[k] = NULL; - clear_bit(k, sqp->in_use_bm); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); /* call scsi_done() from this thread */ sdebug_free_queued_cmd(sqcp); scsi_done(cmnd); @@ -5950,14 +5868,39 @@ static int scsi_debug_write_info(struct Scsi_Host *host, char *buffer, return length; } +struct sdebug_submit_queue_data { + int *first; + int *last; + int queue_num; +}; + +static bool sdebug_submit_queue_iter(struct request *rq, void *opaque) +{ + struct sdebug_submit_queue_data *data = opaque; + u32 unique_tag = blk_mq_unique_tag(rq); + u16 hwq = blk_mq_unique_tag_to_hwq(unique_tag); + u16 tag = blk_mq_unique_tag_to_tag(unique_tag); + int queue_num = data->queue_num; + + if (hwq != queue_num) + return true; + + /* Rely on iter'ing in ascending tag order */ + if (*data->first == -1) + *data->first = *data->last = tag; + else + *data->last = tag; + + return true; +} + /* Output seen with 'cat /proc/scsi/scsi_debug/'. It will be the * same for each scsi_debug host (if more than one). Some of the counters * output are not atomics so might be inaccurate in a busy system. */ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host) { - int f, j, l; - struct sdebug_queue *sqp; struct sdebug_host_info *sdhp; + int j; seq_printf(m, "scsi_debug adapter driver, version %s [%s]\n", SDEBUG_VERSION, sdebug_version_date); @@ -5985,11 +5928,17 @@ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host) atomic_read(&sdeb_mq_poll_count)); seq_printf(m, "submit_queues=%d\n", submit_queues); - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { + for (j = 0; j < submit_queues; ++j) { + int f = -1, l = -1; + struct sdebug_submit_queue_data data = { + .queue_num = j, + .first = &f, + .last = &l, + }; seq_printf(m, " queue %d:\n", j); - f = find_first_bit(sqp->in_use_bm, sdebug_max_queue); - if (f != sdebug_max_queue) { - l = find_last_bit(sqp->in_use_bm, sdebug_max_queue); + blk_mq_tagset_busy_iter(&host->tag_set, sdebug_submit_queue_iter, + &data); + if (f >= 0) { seq_printf(m, " in_use_bm BUSY: %s: %d,%d\n", "first,last bits", f, l); } @@ -6944,13 +6893,6 @@ static int __init scsi_debug_init(void) sdebug_max_queue); } - sdebug_q_arr = kcalloc(submit_queues, sizeof(struct sdebug_queue), - GFP_KERNEL); - if (sdebug_q_arr == NULL) - return -ENOMEM; - for (k = 0; k < submit_queues; ++k) - spin_lock_init(&sdebug_q_arr[k].qc_lock); - /* * check for host managed zoned block device specified with * ptype=0x14 or zbc=XXX. @@ -6959,10 +6901,8 @@ static int __init scsi_debug_init(void) sdeb_zbc_model = BLK_ZONED_HM; } else if (sdeb_zbc_model_s && *sdeb_zbc_model_s) { k = sdeb_zbc_model_str(sdeb_zbc_model_s); - if (k < 0) { - ret = k; - goto free_q_arr; - } + if (k < 0) + return k; sdeb_zbc_model = k; switch (sdeb_zbc_model) { case BLK_ZONED_NONE: @@ -6974,8 +6914,7 @@ static int __init scsi_debug_init(void) break; default: pr_err("Invalid ZBC model\n"); - ret = -EINVAL; - goto free_q_arr; + return -EINVAL; } } if (sdeb_zbc_model != BLK_ZONED_NONE) { @@ -7022,17 +6961,14 @@ static int __init scsi_debug_init(void) sdebug_unmap_granularity <= sdebug_unmap_alignment) { pr_err("ERR: unmap_granularity <= unmap_alignment\n"); - ret = -EINVAL; - goto free_q_arr; + return -EINVAL; } } xa_init_flags(per_store_ap, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); if (want_store) { idx = sdebug_add_store(); - if (idx < 0) { - ret = idx; - goto free_q_arr; - } + if (idx < 0) + return idx; } pseudo_primary = root_device_register("pseudo_0"); @@ -7089,8 +7025,6 @@ dev_unreg: root_device_unregister(pseudo_primary); free_vm: sdebug_erase_store(idx, NULL); -free_q_arr: - kfree(sdebug_q_arr); return ret; } @@ -7107,7 +7041,6 @@ static void __exit scsi_debug_exit(void) sdebug_erase_all_stores(false); xa_destroy(per_store_ap); - kfree(sdebug_q_arr); } device_initcall(scsi_debug_init); @@ -7483,10 +7416,8 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) u32 unique_tag = blk_mq_unique_tag(rq); u16 hwq = blk_mq_unique_tag_to_hwq(unique_tag); struct sdebug_queued_cmd *sqcp; - struct sdebug_queue *sqp; unsigned long flags; int queue_num = data->queue_num; - int qc_idx; ktime_t time; /* We're only interested in one queue for this iteration */ @@ -7506,9 +7437,7 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) return true; } - sqp = sdebug_q_arr + queue_num; sd_dp = &sqcp->sd_dp; - if (READ_ONCE(sd_dp->defer_t) != SDEB_DEFER_POLL) { spin_unlock_irqrestore(&sdsc->lock, flags); return true; @@ -7519,16 +7448,6 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) return true; } - qc_idx = sd_dp->sqa_idx; - sqp->qc_arr[qc_idx] = NULL; - if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) { - spin_unlock_irqrestore(&sdsc->lock, flags); - pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%u\n", - sqp, queue_num, qc_idx); - sdebug_free_queued_cmd(sqcp); - return true; - } - ASSIGN_QUEUED_CMD(cmd, NULL); spin_unlock_irqrestore(&sdsc->lock, flags); @@ -7548,20 +7467,14 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) { int num_entries = 0; - unsigned long iflags; - struct sdebug_queue *sqp; struct sdebug_blk_mq_poll_data data = { .queue_num = queue_num, .num_entries = &num_entries, }; - sqp = sdebug_q_arr + queue_num; - - spin_lock_irqsave(&sqp->qc_lock, iflags); blk_mq_tagset_busy_iter(&shost->tag_set, sdebug_blk_mq_poll_iter, &data); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); if (num_entries > 0) atomic_add(num_entries, &sdeb_mq_poll_count); return num_entries;