linux/drivers/target/target_core_sbc.c

1398 lines
36 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SCSI Block Commands (SBC) parsing and emulation.
*
* (c) Copyright 2002-2013 Datera, Inc.
*
* Nicholas A. Bellinger <nab@kernel.org>
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/ratelimit.h>
#include <linux/crc-t10dif.h>
#include <linux/t10-pi.h>
#include <asm/unaligned.h>
#include <scsi/scsi_proto.h>
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
#include <scsi/scsi_tcq.h>
#include <target/target_core_base.h>
#include <target/target_core_backend.h>
#include <target/target_core_fabric.h>
#include "target_core_internal.h"
#include "target_core_ua.h"
#include "target_core_alua.h"
static sense_reason_t
sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char, u32, bool);
static sense_reason_t sbc_execute_unmap(struct se_cmd *cmd);
static sense_reason_t
sbc_emulate_readcapacity(struct se_cmd *cmd)
{
struct se_device *dev = cmd->se_dev;
unsigned char *cdb = cmd->t_task_cdb;
unsigned long long blocks_long = dev->transport->get_blocks(dev);
unsigned char *rbuf;
unsigned char buf[8];
u32 blocks;
/*
* SBC-2 says:
* If the PMI bit is set to zero and the LOGICAL BLOCK
* ADDRESS field is not set to zero, the device server shall
* terminate the command with CHECK CONDITION status with
* the sense key set to ILLEGAL REQUEST and the additional
* sense code set to INVALID FIELD IN CDB.
*
* In SBC-3, these fields are obsolete, but some SCSI
* compliance tests actually check this, so we might as well
* follow SBC-2.
*/
if (!(cdb[8] & 1) && !!(cdb[2] | cdb[3] | cdb[4] | cdb[5]))
return TCM_INVALID_CDB_FIELD;
if (blocks_long >= 0x00000000ffffffff)
blocks = 0xffffffff;
else
blocks = (u32)blocks_long;
put_unaligned_be32(blocks, &buf[0]);
put_unaligned_be32(dev->dev_attrib.block_size, &buf[4]);
rbuf = transport_kmap_data_sg(cmd);
if (rbuf) {
memcpy(rbuf, buf, min_t(u32, sizeof(buf), cmd->data_length));
transport_kunmap_data_sg(cmd);
}
target_complete_cmd_with_length(cmd, SAM_STAT_GOOD, 8);
return 0;
}
static sense_reason_t
sbc_emulate_readcapacity_16(struct se_cmd *cmd)
{
struct se_device *dev = cmd->se_dev;
struct se_session *sess = cmd->se_sess;
int pi_prot_type = dev->dev_attrib.pi_prot_type;
unsigned char *rbuf;
unsigned char buf[32];
unsigned long long blocks = dev->transport->get_blocks(dev);
memset(buf, 0, sizeof(buf));
put_unaligned_be64(blocks, &buf[0]);
put_unaligned_be32(dev->dev_attrib.block_size, &buf[8]);
/*
* Set P_TYPE and PROT_EN bits for DIF support
*/
if (sess->sup_prot_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS)) {
/*
* Only override a device's pi_prot_type if no T10-PI is
* available, and sess_prot_type has been explicitly enabled.
*/
if (!pi_prot_type)
pi_prot_type = sess->sess_prot_type;
if (pi_prot_type)
buf[12] = (pi_prot_type - 1) << 1 | 0x1;
}
if (dev->transport->get_lbppbe)
buf[13] = dev->transport->get_lbppbe(dev) & 0x0f;
if (dev->transport->get_alignment_offset_lbas) {
u16 lalba = dev->transport->get_alignment_offset_lbas(dev);
put_unaligned_be16(lalba, &buf[14]);
}
/*
* Set Thin Provisioning Enable bit following sbc3r22 in section
* READ CAPACITY (16) byte 14 if emulate_tpu or emulate_tpws is enabled.
*/
if (dev->dev_attrib.emulate_tpu || dev->dev_attrib.emulate_tpws) {
buf[14] |= 0x80;
/*
* LBPRZ signifies that zeroes will be read back from an LBA after
* an UNMAP or WRITE SAME w/ unmap bit (sbc3r36 5.16.2)
*/
if (dev->dev_attrib.unmap_zeroes_data)
buf[14] |= 0x40;
}
rbuf = transport_kmap_data_sg(cmd);
if (rbuf) {
memcpy(rbuf, buf, min_t(u32, sizeof(buf), cmd->data_length));
transport_kunmap_data_sg(cmd);
}
target_complete_cmd_with_length(cmd, SAM_STAT_GOOD, 32);
return 0;
}
static sense_reason_t
sbc_emulate_startstop(struct se_cmd *cmd)
{
unsigned char *cdb = cmd->t_task_cdb;
/*
* See sbc3r36 section 5.25
* Immediate bit should be set since there is nothing to complete
* POWER CONDITION MODIFIER 0h
*/
if (!(cdb[1] & 1) || cdb[2] || cdb[3])
return TCM_INVALID_CDB_FIELD;
/*
* See sbc3r36 section 5.25
* POWER CONDITION 0h START_VALID - process START and LOEJ
*/
if (cdb[4] >> 4 & 0xf)
return TCM_INVALID_CDB_FIELD;
/*
* See sbc3r36 section 5.25
* LOEJ 0h - nothing to load or unload
* START 1h - we are ready
*/
if (!(cdb[4] & 1) || (cdb[4] & 2) || (cdb[4] & 4))
return TCM_INVALID_CDB_FIELD;
target_complete_cmd(cmd, SAM_STAT_GOOD);
return 0;
}
sector_t sbc_get_write_same_sectors(struct se_cmd *cmd)
{
u32 num_blocks;
if (cmd->t_task_cdb[0] == WRITE_SAME)
num_blocks = get_unaligned_be16(&cmd->t_task_cdb[7]);
else if (cmd->t_task_cdb[0] == WRITE_SAME_16)
num_blocks = get_unaligned_be32(&cmd->t_task_cdb[10]);
else /* WRITE_SAME_32 via VARIABLE_LENGTH_CMD */
num_blocks = get_unaligned_be32(&cmd->t_task_cdb[28]);
/*
* Use the explicit range when non zero is supplied, otherwise calculate
* the remaining range based on ->get_blocks() - starting LBA.
*/
if (num_blocks)
return num_blocks;
return cmd->se_dev->transport->get_blocks(cmd->se_dev) -
cmd->t_task_lba + 1;
}
EXPORT_SYMBOL(sbc_get_write_same_sectors);
static sense_reason_t
sbc_execute_write_same_unmap(struct se_cmd *cmd)
{
struct exec_cmd_ops *ops = cmd->protocol_data;
sector_t nolb = sbc_get_write_same_sectors(cmd);
sense_reason_t ret;
if (nolb) {
ret = ops->execute_unmap(cmd, cmd->t_task_lba, nolb);
if (ret)
return ret;
}
target_complete_cmd(cmd, SAM_STAT_GOOD);
return 0;
}
static sense_reason_t
sbc_emulate_noop(struct se_cmd *cmd)
{
target_complete_cmd(cmd, SAM_STAT_GOOD);
return 0;
}
static inline u32 sbc_get_size(struct se_cmd *cmd, u32 sectors)
{
return cmd->se_dev->dev_attrib.block_size * sectors;
}
static inline u32 transport_get_sectors_6(unsigned char *cdb)
{
/*
* Use 8-bit sector value. SBC-3 says:
*
* A TRANSFER LENGTH field set to zero specifies that 256
* logical blocks shall be written. Any other value
* specifies the number of logical blocks that shall be
* written.
*/
return cdb[4] ? : 256;
}
static inline u32 transport_get_sectors_10(unsigned char *cdb)
{
return get_unaligned_be16(&cdb[7]);
}
static inline u32 transport_get_sectors_12(unsigned char *cdb)
{
return get_unaligned_be32(&cdb[6]);
}
static inline u32 transport_get_sectors_16(unsigned char *cdb)
{
return get_unaligned_be32(&cdb[10]);
}
/*
* Used for VARIABLE_LENGTH_CDB WRITE_32 and READ_32 variants
*/
static inline u32 transport_get_sectors_32(unsigned char *cdb)
{
return get_unaligned_be32(&cdb[28]);
}
static inline u32 transport_lba_21(unsigned char *cdb)
{
return get_unaligned_be24(&cdb[1]) & 0x1fffff;
}
static inline u32 transport_lba_32(unsigned char *cdb)
{
return get_unaligned_be32(&cdb[2]);
}
static inline unsigned long long transport_lba_64(unsigned char *cdb)
{
return get_unaligned_be64(&cdb[2]);
}
static sense_reason_t
sbc_setup_write_same(struct se_cmd *cmd, unsigned char flags,
struct exec_cmd_ops *ops)
{
struct se_device *dev = cmd->se_dev;
sector_t end_lba = dev->transport->get_blocks(dev) + 1;
unsigned int sectors = sbc_get_write_same_sectors(cmd);
sense_reason_t ret;
if ((flags & 0x04) || (flags & 0x02)) {
pr_err("WRITE_SAME PBDATA and LBDATA"
" bits not supported for Block Discard"
" Emulation\n");
return TCM_UNSUPPORTED_SCSI_OPCODE;
}
if (sectors > cmd->se_dev->dev_attrib.max_write_same_len) {
pr_warn("WRITE_SAME sectors: %u exceeds max_write_same_len: %u\n",
sectors, cmd->se_dev->dev_attrib.max_write_same_len);
return TCM_INVALID_CDB_FIELD;
}
/*
* Sanity check for LBA wrap and request past end of device.
*/
if (((cmd->t_task_lba + sectors) < cmd->t_task_lba) ||
((cmd->t_task_lba + sectors) > end_lba)) {
pr_err("WRITE_SAME exceeds last lba %llu (lba %llu, sectors %u)\n",
(unsigned long long)end_lba, cmd->t_task_lba, sectors);
return TCM_ADDRESS_OUT_OF_RANGE;
}
/* We always have ANC_SUP == 0 so setting ANCHOR is always an error */
if (flags & 0x10) {
pr_warn("WRITE SAME with ANCHOR not supported\n");
return TCM_INVALID_CDB_FIELD;
}
if (flags & 0x01) {
pr_warn("WRITE SAME with NDOB not supported\n");
return TCM_INVALID_CDB_FIELD;
}
/*
* Special case for WRITE_SAME w/ UNMAP=1 that ends up getting
* translated into block discard requests within backend code.
*/
if (flags & 0x08) {
if (!ops->execute_unmap)
return TCM_UNSUPPORTED_SCSI_OPCODE;
if (!dev->dev_attrib.emulate_tpws) {
pr_err("Got WRITE_SAME w/ UNMAP=1, but backend device"
" has emulate_tpws disabled\n");
return TCM_UNSUPPORTED_SCSI_OPCODE;
}
cmd->execute_cmd = sbc_execute_write_same_unmap;
return 0;
}
if (!ops->execute_write_same)
return TCM_UNSUPPORTED_SCSI_OPCODE;
ret = sbc_check_prot(dev, cmd, flags >> 5, sectors, true);
if (ret)
return ret;
cmd->execute_cmd = ops->execute_write_same;
return 0;
}
static sense_reason_t
sbc_execute_rw(struct se_cmd *cmd)
{
struct exec_cmd_ops *ops = cmd->protocol_data;
return ops->execute_rw(cmd, cmd->t_data_sg, cmd->t_data_nents,
cmd->data_direction);
}
static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success,
int *post_ret)
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
{
struct se_device *dev = cmd->se_dev;
sense_reason_t ret = TCM_NO_SENSE;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
spin_lock_irq(&cmd->t_state_lock);
if (success) {
*post_ret = 1;
if (cmd->scsi_status == SAM_STAT_CHECK_CONDITION)
ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
spin_unlock_irq(&cmd->t_state_lock);
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
/*
* Unlock ->caw_sem originally obtained during sbc_compare_and_write()
* before the original READ I/O submission.
*/
up(&dev->caw_sem);
return ret;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
}
/*
* compare @cmp_len bytes of @read_sgl with @cmp_sgl. On miscompare, fill
* @miscmp_off and return TCM_MISCOMPARE_VERIFY.
*/
static sense_reason_t
compare_and_write_do_cmp(struct scatterlist *read_sgl, unsigned int read_nents,
struct scatterlist *cmp_sgl, unsigned int cmp_nents,
unsigned int cmp_len, unsigned int *miscmp_off)
{
unsigned char *buf = NULL;
struct scatterlist *sg;
sense_reason_t ret;
unsigned int offset;
size_t rc;
int sg_cnt;
buf = kzalloc(cmp_len, GFP_KERNEL);
if (!buf) {
ret = TCM_OUT_OF_RESOURCES;
goto out;
}
rc = sg_copy_to_buffer(cmp_sgl, cmp_nents, buf, cmp_len);
if (!rc) {
pr_err("sg_copy_to_buffer() failed for compare_and_write\n");
ret = TCM_OUT_OF_RESOURCES;
goto out;
}
/*
* Compare SCSI READ payload against verify payload
*/
offset = 0;
ret = TCM_NO_SENSE;
for_each_sg(read_sgl, sg, read_nents, sg_cnt) {
unsigned int len = min(sg->length, cmp_len);
unsigned char *addr = kmap_atomic(sg_page(sg));
if (memcmp(addr, buf + offset, len)) {
unsigned int i;
for (i = 0; i < len && addr[i] == buf[offset + i]; i++)
;
*miscmp_off = offset + i;
pr_warn("Detected MISCOMPARE at offset %u\n",
*miscmp_off);
ret = TCM_MISCOMPARE_VERIFY;
}
kunmap_atomic(addr);
if (ret != TCM_NO_SENSE)
goto out;
offset += len;
cmp_len -= len;
if (!cmp_len)
break;
}
pr_debug("COMPARE AND WRITE read data matches compare data\n");
out:
kfree(buf);
return ret;
}
static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success,
int *post_ret)
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
{
struct se_device *dev = cmd->se_dev;
struct sg_table write_tbl = { };
struct scatterlist *write_sg;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
struct sg_mapping_iter m;
unsigned int len;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
unsigned int block_size = dev->dev_attrib.block_size;
unsigned int compare_len = (cmd->t_task_nolb * block_size);
unsigned int miscmp_off = 0;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
sense_reason_t ret = TCM_NO_SENSE;
int i;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
scsi: target: core: Fix hard lockup when executing a compare-and-write command While handling an I/O completion for the compare portion of a COMPARE_AND_WRITE command, it may happen that the compare_and_write_callback function submits new bio structs while still in softirq context. Low level drivers like md raid5 do not expect their make_request call to be used in softirq context, they call into schedule() and create a deadlocked system. __schedule at ffffffff873a0807 schedule at ffffffff873a0cc5 raid5_get_active_stripe at ffffffffc0875744 [raid456] raid5_make_request at ffffffffc0875a50 [raid456] md_handle_request at ffffffff8713b9f9 md_make_request at ffffffff8713bacb generic_make_request at ffffffff86e6f14b submit_bio at ffffffff86e6f27c iblock_submit_bios at ffffffffc0b4e4dc [target_core_iblock] iblock_execute_rw at ffffffffc0b4f3ce [target_core_iblock] __target_execute_cmd at ffffffffc1090079 [target_core_mod] compare_and_write_callback at ffffffffc1093602 [target_core_mod] target_cmd_interrupted at ffffffffc108d1ec [target_core_mod] target_complete_cmd_with_sense at ffffffffc108d27c [target_core_mod] iblock_complete_cmd at ffffffffc0b4e23a [target_core_iblock] dm_io_dec_pending at ffffffffc00db29e [dm_mod] clone_endio at ffffffffc00dbf07 [dm_mod] raid5_align_endio at ffffffffc086d6c2 [raid456] blk_update_request at ffffffff86e6d950 scsi_end_request at ffffffff87063d48 scsi_io_completion at ffffffff87063ee8 blk_complete_reqs at ffffffff86e77b05 __softirqentry_text_start at ffffffff876000d7 This problem appears to be an issue between target_cmd_interrupted() and compare_and_write_callback(). target_cmd_interrupted() calls the se_cmd's transport_complete_callback function pointer if the se_cmd is being stopped or aborted, and CMD_T_ABORTED was set on the se_cmd. When calling compare_and_write_callback(), the success parameter was set to false. target_cmd_interrupted() seems to expect this means the callback will do cleanup that does not require a process context. But compare_and_write_callback() ignores the parameter if there was I/O done for the compare part of COMPARE_AND_WRITE. Since there was data, the function continued on, passed the compare, and issued a write while ignoring the value of the success parameter. The submit of a bio for the write portion of the COMPARE_AND_WRITE then causes schedule to be unsafely called from the softirq context. Fix the bug in compare_and_write_callback by jumping to the out label if success == "false", after checking if we have been called by transport_generic_request_failure(); The command is being aborted or stopped so there is no need to submit the write bio for the write part of the COMPARE_AND_WRITE command. Signed-off-by: Maurizio Lombardi <mlombard@redhat.com> Link: https://lore.kernel.org/r/20221121092703.316489-1-mlombard@redhat.com Reviewed-by: Mike Christie <michael.christie@oracle.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2022-11-21 17:27:03 +08:00
if (!success) {
/*
* Handle early failure in transport_generic_request_failure(),
* which will not have taken ->caw_sem yet..
*/
if (!cmd->t_data_sg || !cmd->t_bidi_data_sg)
return TCM_NO_SENSE;
/*
* The command has been stopped or aborted so
* we don't have to perform the write operation.
*/
WARN_ON(!(cmd->transport_state &
(CMD_T_ABORTED | CMD_T_STOP)));
goto out;
}
/*
* Handle special case for zero-length COMPARE_AND_WRITE
*/
if (!cmd->data_length)
goto out;
/*
* Immediately exit + release dev->caw_sem if command has already
* been failed with a non-zero SCSI status.
*/
if (cmd->scsi_status) {
pr_debug("compare_and_write_callback: non zero scsi_status:"
" 0x%02x\n", cmd->scsi_status);
*post_ret = 1;
if (cmd->scsi_status == SAM_STAT_CHECK_CONDITION)
ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
goto out;
}
ret = compare_and_write_do_cmp(cmd->t_bidi_data_sg,
cmd->t_bidi_data_nents,
cmd->t_data_sg,
cmd->t_data_nents,
compare_len,
&miscmp_off);
if (ret == TCM_MISCOMPARE_VERIFY) {
/*
* SBC-4 r15: 5.3 COMPARE AND WRITE command
* In the sense data (see 4.18 and SPC-5) the offset from the
* start of the Data-Out Buffer to the first byte of data that
* was not equal shall be reported in the INFORMATION field.
*/
cmd->sense_info = miscmp_off;
goto out;
} else if (ret)
goto out;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
if (sg_alloc_table(&write_tbl, cmd->t_data_nents, GFP_KERNEL) < 0) {
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
pr_err("Unable to allocate compare_and_write sg\n");
ret = TCM_OUT_OF_RESOURCES;
goto out;
}
write_sg = write_tbl.sgl;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
i = 0;
len = compare_len;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
sg_miter_start(&m, cmd->t_data_sg, cmd->t_data_nents, SG_MITER_TO_SG);
/*
* Currently assumes NoLB=1 and SGLs are PAGE_SIZE..
*/
while (len) {
sg_miter_next(&m);
if (block_size < PAGE_SIZE) {
sg_set_page(&write_sg[i], m.page, block_size,
target: fix COMPARE_AND_WRITE non zero SGL offset data corruption target_core_sbc's compare_and_write functionality suffers from taking data at the wrong memory location when writing a CAW request to disk when a SGL offset is non-zero. This can happen with loopback and vhost-scsi fabric drivers when SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC is used to map existing user-space SGL memory into COMPARE_AND_WRITE READ/WRITE payload buffers. Given the following sample LIO subtopology, % targetcli ls /loopback/ o- loopback ................................. [1 Target] o- naa.6001405ebb8df14a ....... [naa.60014059143ed2b3] o- luns ................................... [2 LUNs] o- lun0 ................ [iblock/ram0 (/dev/ram0)] o- lun1 ................ [iblock/ram1 (/dev/ram1)] % lsscsi -g [3:0:1:0] disk LIO-ORG IBLOCK 4.0 /dev/sdc /dev/sg3 [3:0:1:1] disk LIO-ORG IBLOCK 4.0 /dev/sdd /dev/sg4 the following bug can be observed in Linux 4.3 and 4.4~rc1: % perl -e 'print chr$_ for 0..255,reverse 0..255' >rand % perl -e 'print "\0" x 512' >zero % cat rand >/dev/sdd % sg_compare_and_write -i rand -D zero --lba 0 /dev/sdd % sg_compare_and_write -i zero -D rand --lba 0 /dev/sdd Miscompare reported % hexdump -Cn 512 /dev/sdd 00000000 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00000200 Rather than writing all-zeroes as instructed with the -D file, it corrupts the data in the sector by splicing some of the original bytes in. The page of the first entry of cmd->t_data_sg includes the CDB, and sg->offset is set to a position past the CDB. I presume that sg->offset is also the right choice to use for subsequent sglist members. Signed-off-by: Jan Engelhardt <jengelh@netitwork.de> Tested-by: Douglas Gilbert <dgilbert@interlog.com> Cc: <stable@vger.kernel.org> # v3.12+ Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
2015-11-24 00:46:32 +08:00
m.piter.sg->offset + block_size);
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
} else {
sg_miter_next(&m);
sg_set_page(&write_sg[i], m.page, block_size,
target: fix COMPARE_AND_WRITE non zero SGL offset data corruption target_core_sbc's compare_and_write functionality suffers from taking data at the wrong memory location when writing a CAW request to disk when a SGL offset is non-zero. This can happen with loopback and vhost-scsi fabric drivers when SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC is used to map existing user-space SGL memory into COMPARE_AND_WRITE READ/WRITE payload buffers. Given the following sample LIO subtopology, % targetcli ls /loopback/ o- loopback ................................. [1 Target] o- naa.6001405ebb8df14a ....... [naa.60014059143ed2b3] o- luns ................................... [2 LUNs] o- lun0 ................ [iblock/ram0 (/dev/ram0)] o- lun1 ................ [iblock/ram1 (/dev/ram1)] % lsscsi -g [3:0:1:0] disk LIO-ORG IBLOCK 4.0 /dev/sdc /dev/sg3 [3:0:1:1] disk LIO-ORG IBLOCK 4.0 /dev/sdd /dev/sg4 the following bug can be observed in Linux 4.3 and 4.4~rc1: % perl -e 'print chr$_ for 0..255,reverse 0..255' >rand % perl -e 'print "\0" x 512' >zero % cat rand >/dev/sdd % sg_compare_and_write -i rand -D zero --lba 0 /dev/sdd % sg_compare_and_write -i zero -D rand --lba 0 /dev/sdd Miscompare reported % hexdump -Cn 512 /dev/sdd 00000000 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00000200 Rather than writing all-zeroes as instructed with the -D file, it corrupts the data in the sector by splicing some of the original bytes in. The page of the first entry of cmd->t_data_sg includes the CDB, and sg->offset is set to a position past the CDB. I presume that sg->offset is also the right choice to use for subsequent sglist members. Signed-off-by: Jan Engelhardt <jengelh@netitwork.de> Tested-by: Douglas Gilbert <dgilbert@interlog.com> Cc: <stable@vger.kernel.org> # v3.12+ Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
2015-11-24 00:46:32 +08:00
m.piter.sg->offset);
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
}
len -= block_size;
i++;
}
sg_miter_stop(&m);
/*
* Save the original SGL + nents values before updating to new
* assignments, to be released in transport_free_pages() ->
* transport_reset_sgl_orig()
*/
cmd->t_data_sg_orig = cmd->t_data_sg;
cmd->t_data_sg = write_sg;
cmd->t_data_nents_orig = cmd->t_data_nents;
cmd->t_data_nents = 1;
cmd->sam_task_attr = TCM_HEAD_TAG;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
cmd->transport_complete_callback = compare_and_write_post;
/*
* Now reset ->execute_cmd() to the normal sbc_execute_rw() handler
* for submitting the adjusted SGL to write instance user-data.
*/
cmd->execute_cmd = sbc_execute_rw;
spin_lock_irq(&cmd->t_state_lock);
cmd->t_state = TRANSPORT_PROCESSING;
cmd->transport_state |= CMD_T_ACTIVE | CMD_T_SENT;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
spin_unlock_irq(&cmd->t_state_lock);
__target_execute_cmd(cmd, false);
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
return ret;
out:
/*
* In the MISCOMPARE or failure case, unlock ->caw_sem obtained in
* sbc_compare_and_write() before the original READ I/O submission.
*/
up(&dev->caw_sem);
sg_free_table(&write_tbl);
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
return ret;
}
static sense_reason_t
sbc_compare_and_write(struct se_cmd *cmd)
{
struct exec_cmd_ops *ops = cmd->protocol_data;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
struct se_device *dev = cmd->se_dev;
sense_reason_t ret;
int rc;
/*
* Submit the READ first for COMPARE_AND_WRITE to perform the
* comparision using SGLs at cmd->t_bidi_data_sg..
*/
rc = down_interruptible(&dev->caw_sem);
if (rc != 0) {
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
cmd->transport_complete_callback = NULL;
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
/*
* Reset cmd->data_length to individual block_size in order to not
* confuse backend drivers that depend on this value matching the
* size of the I/O being submitted.
*/
cmd->data_length = cmd->t_task_nolb * dev->dev_attrib.block_size;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
ret = ops->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents,
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
DMA_FROM_DEVICE);
if (ret) {
cmd->transport_complete_callback = NULL;
up(&dev->caw_sem);
return ret;
}
/*
* Unlock of dev->caw_sem to occur in compare_and_write_callback()
* upon MISCOMPARE, or in compare_and_write_done() upon completion
* of WRITE instance user-data.
*/
return TCM_NO_SENSE;
}
static int
sbc_set_prot_op_checks(u8 protect, bool fabric_prot, enum target_prot_type prot_type,
bool is_write, struct se_cmd *cmd)
{
if (is_write) {
cmd->prot_op = fabric_prot ? TARGET_PROT_DOUT_STRIP :
protect ? TARGET_PROT_DOUT_PASS :
TARGET_PROT_DOUT_INSERT;
switch (protect) {
case 0x0:
case 0x3:
cmd->prot_checks = 0;
break;
case 0x1:
case 0x5:
cmd->prot_checks = TARGET_DIF_CHECK_GUARD;
if (prot_type == TARGET_DIF_TYPE1_PROT)
cmd->prot_checks |= TARGET_DIF_CHECK_REFTAG;
break;
case 0x2:
if (prot_type == TARGET_DIF_TYPE1_PROT)
cmd->prot_checks = TARGET_DIF_CHECK_REFTAG;
break;
case 0x4:
cmd->prot_checks = TARGET_DIF_CHECK_GUARD;
break;
default:
pr_err("Unsupported protect field %d\n", protect);
return -EINVAL;
}
} else {
cmd->prot_op = fabric_prot ? TARGET_PROT_DIN_INSERT :
protect ? TARGET_PROT_DIN_PASS :
TARGET_PROT_DIN_STRIP;
switch (protect) {
case 0x0:
case 0x1:
case 0x5:
cmd->prot_checks = TARGET_DIF_CHECK_GUARD;
if (prot_type == TARGET_DIF_TYPE1_PROT)
cmd->prot_checks |= TARGET_DIF_CHECK_REFTAG;
break;
case 0x2:
if (prot_type == TARGET_DIF_TYPE1_PROT)
cmd->prot_checks = TARGET_DIF_CHECK_REFTAG;
break;
case 0x3:
cmd->prot_checks = 0;
break;
case 0x4:
cmd->prot_checks = TARGET_DIF_CHECK_GUARD;
break;
default:
pr_err("Unsupported protect field %d\n", protect);
return -EINVAL;
}
}
return 0;
}
static sense_reason_t
sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char protect,
u32 sectors, bool is_write)
{
int sp_ops = cmd->se_sess->sup_prot_ops;
int pi_prot_type = dev->dev_attrib.pi_prot_type;
bool fabric_prot = false;
if (!cmd->t_prot_sg || !cmd->t_prot_nents) {
if (unlikely(protect &&
!dev->dev_attrib.pi_prot_type && !cmd->se_sess->sess_prot_type)) {
pr_err("CDB contains protect bit, but device + fabric does"
" not advertise PROTECT=1 feature bit\n");
return TCM_INVALID_CDB_FIELD;
}
if (cmd->prot_pto)
return TCM_NO_SENSE;
}
switch (dev->dev_attrib.pi_prot_type) {
case TARGET_DIF_TYPE3_PROT:
cmd->reftag_seed = 0xffffffff;
break;
case TARGET_DIF_TYPE2_PROT:
if (protect)
return TCM_INVALID_CDB_FIELD;
cmd->reftag_seed = cmd->t_task_lba;
break;
case TARGET_DIF_TYPE1_PROT:
cmd->reftag_seed = cmd->t_task_lba;
break;
case TARGET_DIF_TYPE0_PROT:
/*
* See if the fabric supports T10-PI, and the session has been
* configured to allow export PROTECT=1 feature bit with backend
* devices that don't support T10-PI.
*/
fabric_prot = is_write ?
!!(sp_ops & (TARGET_PROT_DOUT_PASS | TARGET_PROT_DOUT_STRIP)) :
!!(sp_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DIN_INSERT));
if (fabric_prot && cmd->se_sess->sess_prot_type) {
pi_prot_type = cmd->se_sess->sess_prot_type;
break;
}
if (!protect)
return TCM_NO_SENSE;
fallthrough;
default:
pr_err("Unable to determine pi_prot_type for CDB: 0x%02x "
"PROTECT: 0x%02x\n", cmd->t_task_cdb[0], protect);
return TCM_INVALID_CDB_FIELD;
}
if (sbc_set_prot_op_checks(protect, fabric_prot, pi_prot_type, is_write, cmd))
return TCM_INVALID_CDB_FIELD;
cmd->prot_type = pi_prot_type;
cmd->prot_length = dev->prot_length * sectors;
/**
* In case protection information exists over the wire
* we modify command data length to describe pure data.
* The actual transfer length is data length + protection
* length
**/
if (protect)
cmd->data_length = sectors * dev->dev_attrib.block_size;
pr_debug("%s: prot_type=%d, data_length=%d, prot_length=%d "
"prot_op=%d prot_checks=%d\n",
__func__, cmd->prot_type, cmd->data_length, cmd->prot_length,
cmd->prot_op, cmd->prot_checks);
return TCM_NO_SENSE;
}
static int
sbc_check_dpofua(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb)
{
if (cdb[1] & 0x10) {
/* see explanation in spc_emulate_modesense */
if (!target_check_fua(dev)) {
pr_err("Got CDB: 0x%02x with DPO bit set, but device"
" does not advertise support for DPO\n", cdb[0]);
return -EINVAL;
}
}
if (cdb[1] & 0x8) {
if (!target_check_fua(dev)) {
pr_err("Got CDB: 0x%02x with FUA bit set, but device"
" does not advertise support for FUA write\n",
cdb[0]);
return -EINVAL;
}
cmd->se_cmd_flags |= SCF_FUA;
}
return 0;
}
sense_reason_t
sbc_parse_cdb(struct se_cmd *cmd, struct exec_cmd_ops *ops)
{
struct se_device *dev = cmd->se_dev;
unsigned char *cdb = cmd->t_task_cdb;
unsigned int size;
u32 sectors = 0;
sense_reason_t ret;
cmd->protocol_data = ops;
switch (cdb[0]) {
case READ_6:
sectors = transport_get_sectors_6(cdb);
cmd->t_task_lba = transport_lba_21(cdb);
cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
cmd->execute_cmd = sbc_execute_rw;
break;
case READ_10:
sectors = transport_get_sectors_10(cdb);
cmd->t_task_lba = transport_lba_32(cdb);
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
if (ret)
return ret;
cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
cmd->execute_cmd = sbc_execute_rw;
break;
case READ_12:
sectors = transport_get_sectors_12(cdb);
cmd->t_task_lba = transport_lba_32(cdb);
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
if (ret)
return ret;
cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
cmd->execute_cmd = sbc_execute_rw;
break;
case READ_16:
sectors = transport_get_sectors_16(cdb);
cmd->t_task_lba = transport_lba_64(cdb);
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
if (ret)
return ret;
cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
cmd->execute_cmd = sbc_execute_rw;
break;
case WRITE_6:
sectors = transport_get_sectors_6(cdb);
cmd->t_task_lba = transport_lba_21(cdb);
cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
cmd->execute_cmd = sbc_execute_rw;
break;
case WRITE_10:
case WRITE_VERIFY:
sectors = transport_get_sectors_10(cdb);
cmd->t_task_lba = transport_lba_32(cdb);
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
if (ret)
return ret;
cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
cmd->execute_cmd = sbc_execute_rw;
break;
case WRITE_12:
sectors = transport_get_sectors_12(cdb);
cmd->t_task_lba = transport_lba_32(cdb);
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
if (ret)
return ret;
cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
cmd->execute_cmd = sbc_execute_rw;
break;
case WRITE_16:
case WRITE_VERIFY_16:
sectors = transport_get_sectors_16(cdb);
cmd->t_task_lba = transport_lba_64(cdb);
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
if (ret)
return ret;
cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
cmd->execute_cmd = sbc_execute_rw;
break;
case VARIABLE_LENGTH_CMD:
{
u16 service_action = get_unaligned_be16(&cdb[8]);
switch (service_action) {
case WRITE_SAME_32:
sectors = transport_get_sectors_32(cdb);
if (!sectors) {
pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not"
" supported\n");
return TCM_INVALID_CDB_FIELD;
}
size = sbc_get_size(cmd, 1);
cmd->t_task_lba = get_unaligned_be64(&cdb[12]);
ret = sbc_setup_write_same(cmd, cdb[10], ops);
if (ret)
return ret;
break;
default:
pr_err("VARIABLE_LENGTH_CMD service action"
" 0x%04x not supported\n", service_action);
return TCM_UNSUPPORTED_SCSI_OPCODE;
}
break;
}
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
case COMPARE_AND_WRITE:
if (!dev->dev_attrib.emulate_caw) {
pr_err_ratelimited("se_device %s/%s (vpd_unit_serial %s) reject COMPARE_AND_WRITE\n",
dev->se_hba->backend->ops->name,
config_item_name(&dev->dev_group.cg_item),
dev->t10_wwn.unit_serial);
return TCM_UNSUPPORTED_SCSI_OPCODE;
}
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
sectors = cdb[13];
/*
* Currently enforce COMPARE_AND_WRITE for a single sector
*/
if (sectors > 1) {
pr_err("COMPARE_AND_WRITE contains NoLB: %u greater"
" than 1\n", sectors);
return TCM_INVALID_CDB_FIELD;
}
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
/*
* Double size because we have two buffers, note that
* zero is not an error..
*/
size = 2 * sbc_get_size(cmd, sectors);
cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
cmd->t_task_nolb = sectors;
cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB | SCF_COMPARE_AND_WRITE;
cmd->execute_cmd = sbc_compare_and_write;
cmd->transport_complete_callback = compare_and_write_callback;
break;
case READ_CAPACITY:
size = READ_CAP_LEN;
cmd->execute_cmd = sbc_emulate_readcapacity;
break;
case SERVICE_ACTION_IN_16:
switch (cmd->t_task_cdb[1] & 0x1f) {
case SAI_READ_CAPACITY_16:
cmd->execute_cmd = sbc_emulate_readcapacity_16;
break;
case SAI_REPORT_REFERRALS:
cmd->execute_cmd = target_emulate_report_referrals;
break;
default:
pr_err("Unsupported SA: 0x%02x\n",
cmd->t_task_cdb[1] & 0x1f);
return TCM_INVALID_CDB_FIELD;
}
size = get_unaligned_be32(&cdb[10]);
break;
case SYNCHRONIZE_CACHE:
case SYNCHRONIZE_CACHE_16:
if (cdb[0] == SYNCHRONIZE_CACHE) {
sectors = transport_get_sectors_10(cdb);
cmd->t_task_lba = transport_lba_32(cdb);
} else {
sectors = transport_get_sectors_16(cdb);
cmd->t_task_lba = transport_lba_64(cdb);
}
if (ops->execute_sync_cache) {
cmd->execute_cmd = ops->execute_sync_cache;
goto check_lba;
}
size = 0;
cmd->execute_cmd = sbc_emulate_noop;
break;
case UNMAP:
if (!ops->execute_unmap)
return TCM_UNSUPPORTED_SCSI_OPCODE;
if (!dev->dev_attrib.emulate_tpu) {
pr_err("Got UNMAP, but backend device has"
" emulate_tpu disabled\n");
return TCM_UNSUPPORTED_SCSI_OPCODE;
}
size = get_unaligned_be16(&cdb[7]);
cmd->execute_cmd = sbc_execute_unmap;
break;
case WRITE_SAME_16:
sectors = transport_get_sectors_16(cdb);
if (!sectors) {
pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n");
return TCM_INVALID_CDB_FIELD;
}
size = sbc_get_size(cmd, 1);
cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
ret = sbc_setup_write_same(cmd, cdb[1], ops);
if (ret)
return ret;
break;
case WRITE_SAME:
sectors = transport_get_sectors_10(cdb);
if (!sectors) {
pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n");
return TCM_INVALID_CDB_FIELD;
}
size = sbc_get_size(cmd, 1);
cmd->t_task_lba = get_unaligned_be32(&cdb[2]);
/*
* Follow sbcr26 with WRITE_SAME (10) and check for the existence
* of byte 1 bit 3 UNMAP instead of original reserved field
*/
ret = sbc_setup_write_same(cmd, cdb[1], ops);
if (ret)
return ret;
break;
case VERIFY:
case VERIFY_16:
size = 0;
if (cdb[0] == VERIFY) {
sectors = transport_get_sectors_10(cdb);
cmd->t_task_lba = transport_lba_32(cdb);
} else {
sectors = transport_get_sectors_16(cdb);
cmd->t_task_lba = transport_lba_64(cdb);
}
cmd->execute_cmd = sbc_emulate_noop;
goto check_lba;
case REZERO_UNIT:
case SEEK_6:
case SEEK_10:
/*
* There are still clients out there which use these old SCSI-2
* commands. This mainly happens when running VMs with legacy
* guest systems, connected via SCSI command pass-through to
* iSCSI targets. Make them happy and return status GOOD.
*/
size = 0;
cmd->execute_cmd = sbc_emulate_noop;
break;
case START_STOP:
size = 0;
cmd->execute_cmd = sbc_emulate_startstop;
break;
default:
ret = spc_parse_cdb(cmd, &size);
if (ret)
return ret;
}
/* reject any command that we don't have a handler for */
if (!cmd->execute_cmd)
return TCM_UNSUPPORTED_SCSI_OPCODE;
if (cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) {
unsigned long long end_lba;
check_lba:
end_lba = dev->transport->get_blocks(dev) + 1;
if (((cmd->t_task_lba + sectors) < cmd->t_task_lba) ||
((cmd->t_task_lba + sectors) > end_lba)) {
pr_err("cmd exceeds last lba %llu "
"(lba %llu, sectors %u)\n",
end_lba, cmd->t_task_lba, sectors);
return TCM_ADDRESS_OUT_OF_RANGE;
}
target: Add support for COMPARE_AND_WRITE emulation This patch adds support for COMPARE_AND_WRITE emulation on a per block basis. This logic is used as an atomic test and set primative currently used by VMWare ESX VAAI for performing array side locking of individual VMFS extent ownership. This includes the COMPARE_AND_WRITE CDB parsing within sbc_parse_cdb(), and does the majority of the work within the compare_and_write_callback() to perform the verify instance user data comparision, and subsequent write instance user data I/O submission upon a successfull comparision. The synchronization is enforced by se_device->caw_sem, that is obtained before the initial READ I/O submission in sbc_compare_and_write(). The mutex is then released upon MISCOMPARE in compare_and_write_callback(), or upon WRITE instance user-data completion in compare_and_write_post(). The implementation currently assumes a single logical block (NoLB=1). v4 changes: - Explicitly clear cmd->transport_complete_callback for two failure cases in sbc_compare_and_write() in order to avoid double unlock of ->caw_sem in compare_and_write_callback() (Dan Carpenter) v3 changes: - Convert se_device->caw_mutex to ->caw_sem v2 changes: - Set SCF_COMPARE_AND_WRITE and cmd->execute_cmd() to sbc_compare_and_write() during setup in sbc_parse_cdb() - Use sbc_compare_and_write() for initial READ submission with DMA_FROM_DEVICE - Reset cmd->execute_cmd() to sbc_execute_rw() for write instance user-data in compare_and_write_callback() - Drop SCF_BIDI command flag usage - Set TRANSPORT_PROCESSING + transport_state flags before write instance submission, and convert to __target_execute_cmd() - Prevent sbc_get_size() from being being called twice to generate incorrect size in sbc_parse_cdb() - Enforce se_device->caw_mutex synchronization between initial READ I/O submission, and final WRITE I/O completion. Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Martin Petersen <martin.petersen@oracle.com> Cc: Chris Mason <chris.mason@fusionio.com> Cc: James Bottomley <JBottomley@Parallels.com> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
2013-08-20 06:20:28 +08:00
if (!(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE))
size = sbc_get_size(cmd, sectors);
}
return target_cmd_size_check(cmd, size);
}
EXPORT_SYMBOL(sbc_parse_cdb);
u32 sbc_get_device_type(struct se_device *dev)
{
return TYPE_DISK;
}
EXPORT_SYMBOL(sbc_get_device_type);
static sense_reason_t
sbc_execute_unmap(struct se_cmd *cmd)
{
struct exec_cmd_ops *ops = cmd->protocol_data;
struct se_device *dev = cmd->se_dev;
unsigned char *buf, *ptr = NULL;
sector_t lba;
int size;
u32 range;
sense_reason_t ret = 0;
int dl, bd_dl;
/* We never set ANC_SUP */
if (cmd->t_task_cdb[1])
return TCM_INVALID_CDB_FIELD;
if (cmd->data_length == 0) {
target_complete_cmd(cmd, SAM_STAT_GOOD);
return 0;
}
if (cmd->data_length < 8) {
pr_warn("UNMAP parameter list length %u too small\n",
cmd->data_length);
return TCM_PARAMETER_LIST_LENGTH_ERROR;
}
buf = transport_kmap_data_sg(cmd);
if (!buf)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
dl = get_unaligned_be16(&buf[0]);
bd_dl = get_unaligned_be16(&buf[2]);
size = cmd->data_length - 8;
if (bd_dl > size)
pr_warn("UNMAP parameter list length %u too small, ignoring bd_dl %u\n",
cmd->data_length, bd_dl);
else
size = bd_dl;
if (size / 16 > dev->dev_attrib.max_unmap_block_desc_count) {
ret = TCM_INVALID_PARAMETER_LIST;
goto err;
}
/* First UNMAP block descriptor starts at 8 byte offset */
ptr = &buf[8];
pr_debug("UNMAP: Sub: %s Using dl: %u bd_dl: %u size: %u"
" ptr: %p\n", dev->transport->name, dl, bd_dl, size, ptr);
while (size >= 16) {
lba = get_unaligned_be64(&ptr[0]);
range = get_unaligned_be32(&ptr[8]);
pr_debug("UNMAP: Using lba: %llu and range: %u\n",
(unsigned long long)lba, range);
if (range > dev->dev_attrib.max_unmap_lba_count) {
ret = TCM_INVALID_PARAMETER_LIST;
goto err;
}
if (lba + range > dev->transport->get_blocks(dev) + 1) {
ret = TCM_ADDRESS_OUT_OF_RANGE;
goto err;
}
if (range) {
ret = ops->execute_unmap(cmd, lba, range);
if (ret)
goto err;
}
ptr += 16;
size -= 16;
}
err:
transport_kunmap_data_sg(cmd);
if (!ret)
target_complete_cmd(cmd, SAM_STAT_GOOD);
return ret;
}
void
sbc_dif_generate(struct se_cmd *cmd)
{
struct se_device *dev = cmd->se_dev;
struct t10_pi_tuple *sdt;
struct scatterlist *dsg = cmd->t_data_sg, *psg;
sector_t sector = cmd->t_task_lba;
void *daddr, *paddr;
int i, j, offset = 0;
unsigned int block_size = dev->dev_attrib.block_size;
for_each_sg(cmd->t_prot_sg, psg, cmd->t_prot_nents, i) {
paddr = kmap_atomic(sg_page(psg)) + psg->offset;
daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
for (j = 0; j < psg->length;
j += sizeof(*sdt)) {
__u16 crc;
unsigned int avail;
if (offset >= dsg->length) {
offset -= dsg->length;
kunmap_atomic(daddr - dsg->offset);
dsg = sg_next(dsg);
if (!dsg) {
kunmap_atomic(paddr - psg->offset);
return;
}
daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
}
sdt = paddr + j;
avail = min(block_size, dsg->length - offset);
crc = crc_t10dif(daddr + offset, avail);
if (avail < block_size) {
kunmap_atomic(daddr - dsg->offset);
dsg = sg_next(dsg);
if (!dsg) {
kunmap_atomic(paddr - psg->offset);
return;
}
daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
offset = block_size - avail;
crc = crc_t10dif_update(crc, daddr, offset);
} else {
offset += block_size;
}
sdt->guard_tag = cpu_to_be16(crc);
if (cmd->prot_type == TARGET_DIF_TYPE1_PROT)
sdt->ref_tag = cpu_to_be32(sector & 0xffffffff);
sdt->app_tag = 0;
pr_debug("DIF %s INSERT sector: %llu guard_tag: 0x%04x"
" app_tag: 0x%04x ref_tag: %u\n",
(cmd->data_direction == DMA_TO_DEVICE) ?
"WRITE" : "READ", (unsigned long long)sector,
sdt->guard_tag, sdt->app_tag,
be32_to_cpu(sdt->ref_tag));
sector++;
}
kunmap_atomic(daddr - dsg->offset);
kunmap_atomic(paddr - psg->offset);
}
}
static sense_reason_t
sbc_dif_v1_verify(struct se_cmd *cmd, struct t10_pi_tuple *sdt,
__u16 crc, sector_t sector, unsigned int ei_lba)
{
__be16 csum;
if (!(cmd->prot_checks & TARGET_DIF_CHECK_GUARD))
goto check_ref;
csum = cpu_to_be16(crc);
if (sdt->guard_tag != csum) {
pr_err("DIFv1 checksum failed on sector %llu guard tag 0x%04x"
" csum 0x%04x\n", (unsigned long long)sector,
be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
return TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
}
check_ref:
if (!(cmd->prot_checks & TARGET_DIF_CHECK_REFTAG))
return 0;
if (cmd->prot_type == TARGET_DIF_TYPE1_PROT &&
be32_to_cpu(sdt->ref_tag) != (sector & 0xffffffff)) {
pr_err("DIFv1 Type 1 reference failed on sector: %llu tag: 0x%08x"
" sector MSB: 0x%08x\n", (unsigned long long)sector,
be32_to_cpu(sdt->ref_tag), (u32)(sector & 0xffffffff));
return TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
}
if (cmd->prot_type == TARGET_DIF_TYPE2_PROT &&
be32_to_cpu(sdt->ref_tag) != ei_lba) {
pr_err("DIFv1 Type 2 reference failed on sector: %llu tag: 0x%08x"
" ei_lba: 0x%08x\n", (unsigned long long)sector,
be32_to_cpu(sdt->ref_tag), ei_lba);
return TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
}
return 0;
}
void sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read,
struct scatterlist *sg, int sg_off)
{
struct se_device *dev = cmd->se_dev;
struct scatterlist *psg;
void *paddr, *addr;
unsigned int i, len, left;
unsigned int offset = sg_off;
if (!sg)
return;
left = sectors * dev->prot_length;
for_each_sg(cmd->t_prot_sg, psg, cmd->t_prot_nents, i) {
unsigned int psg_len, copied = 0;
paddr = kmap_atomic(sg_page(psg)) + psg->offset;
psg_len = min(left, psg->length);
while (psg_len) {
len = min(psg_len, sg->length - offset);
addr = kmap_atomic(sg_page(sg)) + sg->offset + offset;
if (read)
memcpy(paddr + copied, addr, len);
else
memcpy(addr, paddr + copied, len);
left -= len;
offset += len;
copied += len;
psg_len -= len;
kunmap_atomic(addr - sg->offset - offset);
if (offset >= sg->length) {
sg = sg_next(sg);
offset = 0;
}
}
kunmap_atomic(paddr - psg->offset);
}
}
EXPORT_SYMBOL(sbc_dif_copy_prot);
sense_reason_t
sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors,
unsigned int ei_lba, struct scatterlist *psg, int psg_off)
{
struct se_device *dev = cmd->se_dev;
struct t10_pi_tuple *sdt;
struct scatterlist *dsg = cmd->t_data_sg;
sector_t sector = start;
void *daddr, *paddr;
int i;
sense_reason_t rc;
int dsg_off = 0;
unsigned int block_size = dev->dev_attrib.block_size;
for (; psg && sector < start + sectors; psg = sg_next(psg)) {
paddr = kmap_atomic(sg_page(psg)) + psg->offset;
daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
for (i = psg_off; i < psg->length &&
sector < start + sectors;
i += sizeof(*sdt)) {
__u16 crc;
unsigned int avail;
if (dsg_off >= dsg->length) {
dsg_off -= dsg->length;
kunmap_atomic(daddr - dsg->offset);
dsg = sg_next(dsg);
if (!dsg) {
kunmap_atomic(paddr - psg->offset);
return 0;
}
daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
}
sdt = paddr + i;
pr_debug("DIF READ sector: %llu guard_tag: 0x%04x"
" app_tag: 0x%04x ref_tag: %u\n",
(unsigned long long)sector, sdt->guard_tag,
sdt->app_tag, be32_to_cpu(sdt->ref_tag));
if (sdt->app_tag == T10_PI_APP_ESCAPE) {
dsg_off += block_size;
goto next;
}
avail = min(block_size, dsg->length - dsg_off);
crc = crc_t10dif(daddr + dsg_off, avail);
if (avail < block_size) {
kunmap_atomic(daddr - dsg->offset);
dsg = sg_next(dsg);
if (!dsg) {
kunmap_atomic(paddr - psg->offset);
return 0;
}
daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
dsg_off = block_size - avail;
crc = crc_t10dif_update(crc, daddr, dsg_off);
} else {
dsg_off += block_size;
}
rc = sbc_dif_v1_verify(cmd, sdt, crc, sector, ei_lba);
if (rc) {
kunmap_atomic(daddr - dsg->offset);
kunmap_atomic(paddr - psg->offset);
cmd->sense_info = sector;
return rc;
}
next:
sector++;
ei_lba++;
}
psg_off = 0;
kunmap_atomic(daddr - dsg->offset);
kunmap_atomic(paddr - psg->offset);
}
return 0;
}
EXPORT_SYMBOL(sbc_dif_verify);