qemu/hw/nvme/dif.c
Klaus Jensen 88eea45c53 hw/nvme: move nvme emulation out of hw/block
With the introduction of the nvme-subsystem device we are really
cluttering up the hw/block directory.

As suggested by Philippe previously, move the nvme emulation to hw/nvme.

Suggested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
2021-05-17 09:19:00 +02:00

510 lines
14 KiB
C

/*
* QEMU NVM Express End-to-End Data Protection support
*
* Copyright (c) 2021 Samsung Electronics Co., Ltd.
*
* Authors:
* Klaus Jensen <k.jensen@samsung.com>
* Gollu Appalanaidu <anaidu.gollu@samsung.com>
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "sysemu/block-backend.h"
#include "nvme.h"
#include "trace.h"
uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba,
uint32_t reftag)
{
if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
(ctrl & NVME_RW_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) {
return NVME_INVALID_PROT_INFO | NVME_DNR;
}
return NVME_SUCCESS;
}
/* from Linux kernel (crypto/crct10dif_common.c) */
static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer,
size_t len)
{
unsigned int i;
for (i = 0; i < len; i++) {
crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
}
return crc;
}
void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
uint8_t *mbuf, size_t mlen, uint16_t apptag,
uint32_t reftag)
{
uint8_t *end = buf + len;
int16_t pil = 0;
if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
}
trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil,
apptag, reftag);
for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
if (pil) {
crc = crc_t10dif(crc, mbuf, pil);
}
dif->guard = cpu_to_be16(crc);
dif->apptag = cpu_to_be16(apptag);
dif->reftag = cpu_to_be32(reftag);
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
reftag++;
}
}
}
static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
uint8_t *buf, uint8_t *mbuf, size_t pil,
uint16_t ctrl, uint16_t apptag,
uint16_t appmask, uint32_t reftag)
{
switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
case NVME_ID_NS_DPS_TYPE_3:
if (be32_to_cpu(dif->reftag) != 0xffffffff) {
break;
}
/* fallthrough */
case NVME_ID_NS_DPS_TYPE_1:
case NVME_ID_NS_DPS_TYPE_2:
if (be16_to_cpu(dif->apptag) != 0xffff) {
break;
}
trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag),
be32_to_cpu(dif->reftag));
return NVME_SUCCESS;
}
if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) {
uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
if (pil) {
crc = crc_t10dif(crc, mbuf, pil);
}
trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc);
if (be16_to_cpu(dif->guard) != crc) {
return NVME_E2E_GUARD_ERROR;
}
}
if (ctrl & NVME_RW_PRINFO_PRCHK_APP) {
trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag,
appmask);
if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) {
return NVME_E2E_APP_ERROR;
}
}
if (ctrl & NVME_RW_PRINFO_PRCHK_REF) {
trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag);
if (be32_to_cpu(dif->reftag) != reftag) {
return NVME_E2E_REF_ERROR;
}
}
return NVME_SUCCESS;
}
uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
uint8_t *mbuf, size_t mlen, uint16_t ctrl,
uint64_t slba, uint16_t apptag,
uint16_t appmask, uint32_t reftag)
{
uint8_t *end = buf + len;
int16_t pil = 0;
uint16_t status;
status = nvme_check_prinfo(ns, ctrl, slba, reftag);
if (status) {
return status;
}
if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
}
trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), ns->lbasz + pil);
for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag,
appmask, reftag);
if (status) {
return status;
}
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
reftag++;
}
}
return NVME_SUCCESS;
}
uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
uint64_t slba)
{
BlockBackend *blk = ns->blkconf.blk;
BlockDriverState *bs = blk_bs(blk);
int64_t moffset = 0, offset = nvme_l2b(ns, slba);
uint8_t *mbufp, *end;
bool zeroed;
int16_t pil = 0;
int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds;
int64_t pnum = 0;
Error *err = NULL;
if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
}
do {
int ret;
bytes -= pnum;
ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
if (ret < 0) {
error_setg_errno(&err, -ret, "unable to get block status");
error_report_err(err);
return NVME_INTERNAL_DEV_ERROR;
}
zeroed = !!(ret & BDRV_BLOCK_ZERO);
trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed);
if (zeroed) {
mbufp = mbuf + moffset;
mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
end = mbufp + mlen;
for (; mbufp < end; mbufp += ns->lbaf.ms) {
memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
}
}
moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
offset += pnum;
} while (pnum != bytes);
return NVME_SUCCESS;
}
static void nvme_dif_rw_cb(void *opaque, int ret)
{
NvmeBounceContext *ctx = opaque;
NvmeRequest *req = ctx->req;
NvmeNamespace *ns = req->ns;
BlockBackend *blk = ns->blkconf.blk;
trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk));
qemu_iovec_destroy(&ctx->data.iov);
g_free(ctx->data.bounce);
qemu_iovec_destroy(&ctx->mdata.iov);
g_free(ctx->mdata.bounce);
g_free(ctx);
nvme_rw_complete_cb(req, ret);
}
static void nvme_dif_rw_check_cb(void *opaque, int ret)
{
NvmeBounceContext *ctx = opaque;
NvmeRequest *req = ctx->req;
NvmeNamespace *ns = req->ns;
NvmeCtrl *n = nvme_ctrl(req);
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
uint64_t slba = le64_to_cpu(rw->slba);
uint16_t ctrl = le16_to_cpu(rw->control);
uint16_t apptag = le16_to_cpu(rw->apptag);
uint16_t appmask = le16_to_cpu(rw->appmask);
uint32_t reftag = le32_to_cpu(rw->reftag);
uint16_t status;
trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag,
appmask, reftag);
if (ret) {
goto out;
}
status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size,
slba);
if (status) {
req->status = status;
goto out;
}
status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
slba, apptag, appmask, reftag);
if (status) {
req->status = status;
goto out;
}
status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
NVME_TX_DIRECTION_FROM_DEVICE, req);
if (status) {
req->status = status;
goto out;
}
if (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8) {
goto out;
}
status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
NVME_TX_DIRECTION_FROM_DEVICE, req);
if (status) {
req->status = status;
}
out:
nvme_dif_rw_cb(ctx, ret);
}
static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret)
{
NvmeBounceContext *ctx = opaque;
NvmeRequest *req = ctx->req;
NvmeNamespace *ns = req->ns;
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
size_t mlen = nvme_m2b(ns, nlb);
uint64_t offset = nvme_moff(ns, slba);
BlockBackend *blk = ns->blkconf.blk;
trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk));
if (ret) {
goto out;
}
ctx->mdata.bounce = g_malloc(mlen);
qemu_iovec_reset(&ctx->mdata.iov);
qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
nvme_dif_rw_check_cb, ctx);
return;
out:
nvme_dif_rw_cb(ctx, ret);
}
static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret)
{
NvmeBounceContext *ctx = opaque;
NvmeRequest *req = ctx->req;
NvmeNamespace *ns = req->ns;
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
uint64_t slba = le64_to_cpu(rw->slba);
uint64_t offset = nvme_moff(ns, slba);
BlockBackend *blk = ns->blkconf.blk;
trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk));
if (ret) {
goto out;
}
req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0,
nvme_dif_rw_cb, ctx);
return;
out:
nvme_dif_rw_cb(ctx, ret);
}
uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
{
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
NvmeNamespace *ns = req->ns;
BlockBackend *blk = ns->blkconf.blk;
bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES;
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
uint64_t slba = le64_to_cpu(rw->slba);
size_t len = nvme_l2b(ns, nlb);
size_t mlen = nvme_m2b(ns, nlb);
size_t mapped_len = len;
int64_t offset = nvme_l2b(ns, slba);
uint16_t ctrl = le16_to_cpu(rw->control);
uint16_t apptag = le16_to_cpu(rw->apptag);
uint16_t appmask = le16_to_cpu(rw->appmask);
uint32_t reftag = le32_to_cpu(rw->reftag);
bool pract = !!(ctrl & NVME_RW_PRINFO_PRACT);
NvmeBounceContext *ctx;
uint16_t status;
trace_pci_nvme_dif_rw(pract, NVME_RW_PRINFO(ctrl));
ctx = g_new0(NvmeBounceContext, 1);
ctx->req = req;
if (wrz) {
BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP;
if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) {
status = NVME_INVALID_PROT_INFO | NVME_DNR;
goto err;
}
if (pract) {
uint8_t *mbuf, *end;
int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
status = nvme_check_prinfo(ns, ctrl, slba, reftag);
if (status) {
goto err;
}
flags = 0;
ctx->mdata.bounce = g_malloc0(mlen);
qemu_iovec_init(&ctx->mdata.iov, 1);
qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
mbuf = ctx->mdata.bounce;
end = mbuf + mlen;
if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) {
pil = 0;
}
for (; mbuf < end; mbuf += ns->lbaf.ms) {
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
dif->apptag = cpu_to_be16(apptag);
dif->reftag = cpu_to_be32(reftag);
switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
case NVME_ID_NS_DPS_TYPE_1:
case NVME_ID_NS_DPS_TYPE_2:
reftag++;
}
}
}
req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags,
nvme_dif_rw_mdata_out_cb, ctx);
return NVME_NO_COMPLETE;
}
if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) {
mapped_len += mlen;
}
status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd);
if (status) {
goto err;
}
ctx->data.bounce = g_malloc(len);
qemu_iovec_init(&ctx->data.iov, 1);
qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
if (req->cmd.opcode == NVME_CMD_READ) {
block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
BLOCK_ACCT_READ);
req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
nvme_dif_rw_mdata_in_cb, ctx);
return NVME_NO_COMPLETE;
}
status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
NVME_TX_DIRECTION_TO_DEVICE, req);
if (status) {
goto err;
}
ctx->mdata.bounce = g_malloc(mlen);
qemu_iovec_init(&ctx->mdata.iov, 1);
qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
if (!(pract && ns->lbaf.ms == 8)) {
status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
NVME_TX_DIRECTION_TO_DEVICE, req);
if (status) {
goto err;
}
}
status = nvme_check_prinfo(ns, ctrl, slba, reftag);
if (status) {
goto err;
}
if (pract) {
/* splice generated protection information into the buffer */
nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size,
ctx->mdata.bounce, ctx->mdata.iov.size,
apptag, reftag);
} else {
status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
slba, apptag, appmask, reftag);
if (status) {
goto err;
}
}
block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
BLOCK_ACCT_WRITE);
req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0,
nvme_dif_rw_mdata_out_cb, ctx);
return NVME_NO_COMPLETE;
err:
qemu_iovec_destroy(&ctx->data.iov);
g_free(ctx->data.bounce);
qemu_iovec_destroy(&ctx->mdata.iov);
g_free(ctx->mdata.bounce);
g_free(ctx);
return status;
}