scsi: sd: Use the block layer zone append emulation

Set the request queue of a TYPE_ZBC device as needing zone append
emulation by setting the device queue max_zone_append_sectors limit to
0. This enables the block layer generic implementation provided by zone
write plugging. With this, the sd driver will never see a
REQ_OP_ZONE_APPEND request and the zone append emulation code
implemented in sd_zbc.c can be removed.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Hans Holmberg <hans.holmberg@wdc.com>
Tested-by: Dennis Maisenbacher <dennis.maisenbacher@wdc.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240408014128.205141-14-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Damien Le Moal 2024-04-08 10:41:13 +09:00 committed by Jens Axboe
parent f211268ed1
commit 1846f308d6
3 changed files with 10 additions and 352 deletions

View File

@ -1260,12 +1260,6 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
}
}
if (req_op(rq) == REQ_OP_ZONE_APPEND) {
ret = sd_zbc_prepare_zone_append(cmd, &lba, nr_blocks);
if (ret)
goto fail;
}
fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0;
dix = scsi_prot_sg_count(cmd);
dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
@ -1348,7 +1342,6 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
return sd_setup_flush_cmnd(cmd);
case REQ_OP_READ:
case REQ_OP_WRITE:
case REQ_OP_ZONE_APPEND:
return sd_setup_read_write_cmnd(cmd);
case REQ_OP_ZONE_RESET:
return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER,
@ -3979,7 +3972,6 @@ static void scsi_disk_release(struct device *dev)
struct scsi_disk *sdkp = to_scsi_disk(dev);
ida_free(&sd_index_ida, sdkp->index);
sd_zbc_free_zone_info(sdkp);
put_device(&sdkp->device->sdev_gendev);
free_opal_dev(sdkp->opal_dev);

View File

@ -104,12 +104,6 @@ struct scsi_disk {
* between zone starting LBAs is constant.
*/
u32 zone_starting_lba_gran;
u32 *zones_wp_offset;
spinlock_t zones_wp_offset_lock;
u32 *rev_wp_offset;
struct mutex rev_mutex;
struct work_struct zone_wp_offset_work;
char *zone_wp_update_buf;
#endif
atomic_t openers;
sector_t capacity; /* size in logical blocks */
@ -245,7 +239,6 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
#ifdef CONFIG_BLK_DEV_ZONED
void sd_zbc_free_zone_info(struct scsi_disk *sdkp);
int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]);
int sd_zbc_revalidate_zones(struct scsi_disk *sdkp);
blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
@ -255,13 +248,8 @@ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
unsigned int nr_blocks);
#else /* CONFIG_BLK_DEV_ZONED */
static inline void sd_zbc_free_zone_info(struct scsi_disk *sdkp) {}
static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
{
return 0;
@ -285,13 +273,6 @@ static inline unsigned int sd_zbc_complete(struct scsi_cmnd *cmd,
return good_bytes;
}
static inline blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd,
sector_t *lba,
unsigned int nr_blocks)
{
return BLK_STS_TARGET;
}
#define sd_zbc_report_zones NULL
#endif /* CONFIG_BLK_DEV_ZONED */

View File

@ -23,36 +23,6 @@
#define CREATE_TRACE_POINTS
#include "sd_trace.h"
/**
* sd_zbc_get_zone_wp_offset - Get zone write pointer offset.
* @zone: Zone for which to return the write pointer offset.
*
* Return: offset of the write pointer from the start of the zone.
*/
static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
{
if (zone->type == ZBC_ZONE_TYPE_CONV)
return 0;
switch (zone->cond) {
case BLK_ZONE_COND_IMP_OPEN:
case BLK_ZONE_COND_EXP_OPEN:
case BLK_ZONE_COND_CLOSED:
return zone->wp - zone->start;
case BLK_ZONE_COND_FULL:
return zone->len;
case BLK_ZONE_COND_EMPTY:
case BLK_ZONE_COND_OFFLINE:
case BLK_ZONE_COND_READONLY:
default:
/*
* Offline and read-only zones do not have a valid
* write pointer. Use 0 as for an empty zone.
*/
return 0;
}
}
/* Whether or not a SCSI zone descriptor describes a gap zone. */
static bool sd_zbc_is_gap_zone(const u8 buf[64])
{
@ -121,9 +91,6 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64],
if (ret)
return ret;
if (sdkp->rev_wp_offset)
sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone);
return 0;
}
@ -347,123 +314,6 @@ static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
return BLK_STS_OK;
}
#define SD_ZBC_INVALID_WP_OFST (~0u)
#define SD_ZBC_UPDATING_WP_OFST (SD_ZBC_INVALID_WP_OFST - 1)
static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
void *data)
{
struct scsi_disk *sdkp = data;
lockdep_assert_held(&sdkp->zones_wp_offset_lock);
sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone);
return 0;
}
/*
* An attempt to append a zone triggered an invalid write pointer error.
* Reread the write pointer of the zone(s) in which the append failed.
*/
static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
{
struct scsi_disk *sdkp;
unsigned long flags;
sector_t zno;
int ret;
sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
for (zno = 0; zno < sdkp->zone_info.nr_zones; zno++) {
if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
continue;
spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
SD_BUF_SIZE,
zno * sdkp->zone_info.zone_blocks, true);
spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
if (!ret)
sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
zno, sd_zbc_update_wp_offset_cb,
sdkp);
}
spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
scsi_device_put(sdkp->device);
}
/**
* sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command.
* @cmd: the command to setup
* @lba: the LBA to patch
* @nr_blocks: the number of LBAs to be written
*
* Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND.
* @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and
* patching of the lba for an emulated ZONE_APPEND command.
*
* In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will
* schedule a REPORT ZONES command and return BLK_STS_IOERR.
*/
blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
unsigned int nr_blocks)
{
struct request *rq = scsi_cmd_to_rq(cmd);
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
unsigned int wp_offset, zno = blk_rq_zone_no(rq);
unsigned long flags;
blk_status_t ret;
ret = sd_zbc_cmnd_checks(cmd);
if (ret != BLK_STS_OK)
return ret;
if (!blk_rq_zone_is_seq(rq))
return BLK_STS_IOERR;
/* Unlock of the write lock will happen in sd_zbc_complete() */
if (!blk_req_zone_write_trylock(rq))
return BLK_STS_ZONE_RESOURCE;
spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
wp_offset = sdkp->zones_wp_offset[zno];
switch (wp_offset) {
case SD_ZBC_INVALID_WP_OFST:
/*
* We are about to schedule work to update a zone write pointer
* offset, which will cause the zone append command to be
* requeued. So make sure that the scsi device does not go away
* while the work is being processed.
*/
if (scsi_device_get(sdkp->device)) {
ret = BLK_STS_IOERR;
break;
}
sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST;
schedule_work(&sdkp->zone_wp_offset_work);
fallthrough;
case SD_ZBC_UPDATING_WP_OFST:
ret = BLK_STS_DEV_RESOURCE;
break;
default:
wp_offset = sectors_to_logical(sdkp->device, wp_offset);
if (wp_offset + nr_blocks > sdkp->zone_info.zone_blocks) {
ret = BLK_STS_IOERR;
break;
}
trace_scsi_prepare_zone_append(cmd, *lba, wp_offset);
*lba += wp_offset;
}
spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
if (ret)
blk_req_zone_write_unlock(rq);
return ret;
}
/**
* sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
* can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
@ -504,96 +354,6 @@ blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
return BLK_STS_OK;
}
static bool sd_zbc_need_zone_wp_update(struct request *rq)
{
switch (req_op(rq)) {
case REQ_OP_ZONE_APPEND:
case REQ_OP_ZONE_FINISH:
case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_RESET_ALL:
return true;
case REQ_OP_WRITE:
case REQ_OP_WRITE_ZEROES:
return blk_rq_zone_is_seq(rq);
default:
return false;
}
}
/**
* sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion
* @cmd: Completed command
* @good_bytes: Command reply bytes
*
* Called from sd_zbc_complete() to handle the update of the cached zone write
* pointer value in case an update is needed.
*/
static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
unsigned int good_bytes)
{
int result = cmd->result;
struct request *rq = scsi_cmd_to_rq(cmd);
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
unsigned int zno = blk_rq_zone_no(rq);
enum req_op op = req_op(rq);
unsigned long flags;
/*
* If we got an error for a command that needs updating the write
* pointer offset cache, we must mark the zone wp offset entry as
* invalid to force an update from disk the next time a zone append
* command is issued.
*/
spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
if (result && op != REQ_OP_ZONE_RESET_ALL) {
if (op == REQ_OP_ZONE_APPEND) {
/* Force complete completion (no retry) */
good_bytes = 0;
scsi_set_resid(cmd, blk_rq_bytes(rq));
}
/*
* Force an update of the zone write pointer offset on
* the next zone append access.
*/
if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST;
goto unlock_wp_offset;
}
switch (op) {
case REQ_OP_ZONE_APPEND:
trace_scsi_zone_wp_update(cmd, rq->__sector,
sdkp->zones_wp_offset[zno], good_bytes);
rq->__sector += sdkp->zones_wp_offset[zno];
fallthrough;
case REQ_OP_WRITE_ZEROES:
case REQ_OP_WRITE:
if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp))
sdkp->zones_wp_offset[zno] +=
good_bytes >> SECTOR_SHIFT;
break;
case REQ_OP_ZONE_RESET:
sdkp->zones_wp_offset[zno] = 0;
break;
case REQ_OP_ZONE_FINISH:
sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp);
break;
case REQ_OP_ZONE_RESET_ALL:
memset(sdkp->zones_wp_offset, 0,
sdkp->zone_info.nr_zones * sizeof(unsigned int));
break;
default:
break;
}
unlock_wp_offset:
spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
return good_bytes;
}
/**
* sd_zbc_complete - ZBC command post processing.
* @cmd: Completed command
@ -619,11 +379,7 @@ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
* so be quiet about the error.
*/
rq->rq_flags |= RQF_QUIET;
} else if (sd_zbc_need_zone_wp_update(rq))
good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes);
if (req_op(rq) == REQ_OP_ZONE_APPEND)
blk_req_zone_write_unlock(rq);
}
return good_bytes;
}
@ -780,46 +536,6 @@ static void sd_zbc_print_zones(struct scsi_disk *sdkp)
sdkp->zone_info.zone_blocks);
}
static int sd_zbc_init_disk(struct scsi_disk *sdkp)
{
sdkp->zones_wp_offset = NULL;
spin_lock_init(&sdkp->zones_wp_offset_lock);
sdkp->rev_wp_offset = NULL;
mutex_init(&sdkp->rev_mutex);
INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
if (!sdkp->zone_wp_update_buf)
return -ENOMEM;
return 0;
}
void sd_zbc_free_zone_info(struct scsi_disk *sdkp)
{
if (!sdkp->zone_wp_update_buf)
return;
/* Serialize against revalidate zones */
mutex_lock(&sdkp->rev_mutex);
kvfree(sdkp->zones_wp_offset);
sdkp->zones_wp_offset = NULL;
kfree(sdkp->zone_wp_update_buf);
sdkp->zone_wp_update_buf = NULL;
sdkp->early_zone_info = (struct zoned_disk_info){ };
sdkp->zone_info = (struct zoned_disk_info){ };
mutex_unlock(&sdkp->rev_mutex);
}
static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
{
struct scsi_disk *sdkp = scsi_disk(disk);
swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset);
}
/*
* Call blk_revalidate_disk_zones() if any of the zoned disk properties have
* changed that make it necessary to call that function. Called by
@ -831,18 +547,8 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
struct request_queue *q = disk->queue;
u32 zone_blocks = sdkp->early_zone_info.zone_blocks;
unsigned int nr_zones = sdkp->early_zone_info.nr_zones;
int ret = 0;
unsigned int flags;
/*
* For all zoned disks, initialize zone append emulation data if not
* already done.
*/
if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) {
ret = sd_zbc_init_disk(sdkp);
if (ret)
return ret;
}
int ret;
/*
* There is nothing to do for regular disks, including host-aware disks
@ -851,50 +557,32 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
if (!blk_queue_is_zoned(q))
return 0;
/*
* Make sure revalidate zones are serialized to ensure exclusive
* updates of the scsi disk data.
*/
mutex_lock(&sdkp->rev_mutex);
if (sdkp->zone_info.zone_blocks == zone_blocks &&
sdkp->zone_info.nr_zones == nr_zones &&
disk->nr_zones == nr_zones)
goto unlock;
return 0;
flags = memalloc_noio_save();
sdkp->zone_info.zone_blocks = zone_blocks;
sdkp->zone_info.nr_zones = nr_zones;
sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL);
if (!sdkp->rev_wp_offset) {
ret = -ENOMEM;
memalloc_noio_restore(flags);
goto unlock;
}
blk_queue_chunk_sectors(q,
logical_to_sectors(sdkp->device, zone_blocks));
blk_queue_max_zone_append_sectors(q,
q->limits.max_segments << PAGE_SECTORS_SHIFT);
ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
/* Enable block layer zone append emulation */
blk_queue_max_zone_append_sectors(q, 0);
flags = memalloc_noio_save();
ret = blk_revalidate_disk_zones(disk, NULL);
memalloc_noio_restore(flags);
kvfree(sdkp->rev_wp_offset);
sdkp->rev_wp_offset = NULL;
if (ret) {
sdkp->zone_info = (struct zoned_disk_info){ };
sdkp->capacity = 0;
goto unlock;
return ret;
}
sd_zbc_print_zones(sdkp);
unlock:
mutex_unlock(&sdkp->rev_mutex);
return ret;
return 0;
}
/**
@ -917,10 +605,8 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
if (!sd_is_zoned(sdkp)) {
/*
* Device managed or normal SCSI disk, no special handling
* required. Nevertheless, free the disk zone information in
* case the device type changed.
* required.
*/
sd_zbc_free_zone_info(sdkp);
return 0;
}
@ -941,7 +627,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
/* The drive satisfies the kernel restrictions: set it up */
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
if (sdkp->zones_max_open == U32_MAX)
disk_set_max_open_zones(disk, 0);
else