mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
for-5.15/drivers-2021-08-30
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmEs6LsQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpqnLD/9c8v7WTLjrDR6FLD8fHUmkwk9ss6OeyYJC Z62QOyk6BqNOu6FAwBax9wFaboXdUqOdpJU0PVQ7WJ5wBiCQ9DAZY6T+iwW0jE79 +iOSqdXHVLAIyIM9GplzLH5AH3tx4445bX7fRWwWX1OgmSidkAhb25FusCvpcpHx 1k+9dSLClLeHPR6jVT3k6tHv2RzPSw+/vYOggeWYA0YYPfoCx/Ft0uwO+PjKpvLQ Je5jASlLGYCXazswJBZgfjbroA97EuaLOmHHIHrwhkkFsbV6ewv6mlmanbMEs4fX Wh+axTt8so27g6gbw31EOcGsxTi0B37Jx9MOrSla6NdJoZkFE2sn6K+D5k4oeSrg QgYXL00U62eSgWmgSB0f0X081cQfI+FUMe5u5S368WdrgCPfaXl11zHw8nXw8gEW UvqR4zr3hQd4piXsIWl2bwZrmpPBCeB8iStLq3C92RLPFT6hJO3GM/ZmwTn+0HT0 lMXzoEdkPywkKWi8aBbSgzXiGknNl8HAYnwMhcQjiHbYQOycGkI9pigJDNY9Ox1l fYHFSompmJ/XK8cIiU7QIglXEXJky5jQ89Ni0ryCstOaP20tPxWtkpOCgidXfNGz 4lmQV8D5aBTUFs6ifPjXfiXUmDiU3SaxiFhAqaEkGII9BbkrNhlibB4LBAU+toi1 Q0yGhGR/mg== =4uWF -----END PGP SIGNATURE----- Merge tag 'for-5.15/drivers-2021-08-30' of git://git.kernel.dk/linux-block Pull block driver updates from Jens Axboe: "Sitting on top of the core block changes, here are the driver changes for the 5.15 merge window: - NVMe updates via Christoph: - suspend improvements for devices with an HMB (Keith Busch) - handle double completions more gacefull (Sagi Grimberg) - cleanup the selects for the nvme core code a bit (Sagi Grimberg) - don't update queue count when failing to set io queues (Ruozhu Li) - various nvmet connect fixes (Amit Engel) - cleanup lightnvm leftovers (Keith Busch, me) - small cleanups (Colin Ian King, Hou Pu) - add tracing for the Set Features command (Hou Pu) - CMB sysfs cleanups (Keith Busch) - add a mutex_destroy call (Keith Busch) - remove lightnvm subsystem. It's served its purpose and ultimately led to zoned nvme support, we no longer need it (Christoph) - revert floppy O_NDELAY fix (Denis) - nbd fixes (Hou, Pavel, Baokun) - nbd locking fixes (Tetsuo) - nbd device removal fixes (Christoph) - raid10 rcu warning fix (Xiao) - raid1 write behind fix (Guoqing) - rnbd fixes (Gioh, Md Haris) - misc fixes (Colin)" * tag 'for-5.15/drivers-2021-08-30' of git://git.kernel.dk/linux-block: (42 commits) Revert "floppy: reintroduce O_NDELAY fix" raid1: ensure write behind bio has less than BIO_MAX_VECS sectors md/raid10: Remove unnecessary rcu_dereference in raid10_handle_discard nbd: remove nbd->destroy_complete nbd: only return usable devices from nbd_find_unused nbd: set nbd->index before releasing nbd_index_mutex nbd: prevent IDR lookups from finding partially initialized devices nbd: reset NBD to NULL when restarting in nbd_genl_connect nbd: add missing locking to the nbd_dev_add error path nvme: remove the unused NVME_NS_* enum nvme: remove nvm_ndev from ns nvme: Have NVME_FABRICS select NVME_CORE instead of transport drivers block: nbd: add sanity check for first_minor nvmet: check that host sqsize does not exceed ctrl MQES nvmet: avoid duplicate qid in connect cmd nvmet: pass back cntlid on successful completion nvme-rdma: don't update queue count when failing to set io queues nvme-tcp: don't update queue count when failing to set io queues nvme-tcp: pair send_mutex init with destroy nvme: allow user toggling hmb usage ...
This commit is contained in:
commit
9a1d6c9e3f
@ -85,7 +85,6 @@ available subsections can be seen below.
|
||||
io-mapping
|
||||
io_ordering
|
||||
generic-counter
|
||||
lightnvm-pblk
|
||||
memory-devices/index
|
||||
men-chameleon-bus
|
||||
ntb
|
||||
|
@ -1,21 +0,0 @@
|
||||
pblk: Physical Block Device Target
|
||||
==================================
|
||||
|
||||
pblk implements a fully associative, host-based FTL that exposes a traditional
|
||||
block I/O interface. Its primary responsibilities are:
|
||||
|
||||
- Map logical addresses onto physical addresses (4KB granularity) in a
|
||||
logical-to-physical (L2P) table.
|
||||
- Maintain the integrity and consistency of the L2P table as well as its
|
||||
recovery from normal tear down and power outage.
|
||||
- Deal with controller- and media-specific constrains.
|
||||
- Handle I/O errors.
|
||||
- Implement garbage collection.
|
||||
- Maintain consistency across the I/O stack during synchronization points.
|
||||
|
||||
For more information please refer to:
|
||||
|
||||
http://lightnvm.io
|
||||
|
||||
which maintains updated FAQs, manual pages, technical documentation, tools,
|
||||
contacts, etc.
|
@ -160,7 +160,6 @@ Code Seq# Include File Comments
|
||||
'K' all linux/kd.h
|
||||
'L' 00-1F linux/loop.h conflict!
|
||||
'L' 10-1F drivers/scsi/mpt3sas/mpt3sas_ctl.h conflict!
|
||||
'L' 20-2F linux/lightnvm.h
|
||||
'L' E0-FF linux/ppdd.h encrypted disk device driver
|
||||
<http://linux01.gwdg.de/~alatham/ppdd.html>
|
||||
'M' all linux/soundcard.h conflict!
|
||||
|
@ -10619,15 +10619,6 @@ F: LICENSES/
|
||||
F: scripts/spdxcheck-test.sh
|
||||
F: scripts/spdxcheck.py
|
||||
|
||||
LIGHTNVM PLATFORM SUPPORT
|
||||
M: Matias Bjorling <mb@lightnvm.io>
|
||||
L: linux-block@vger.kernel.org
|
||||
S: Maintained
|
||||
W: http://github/OpenChannelSSD
|
||||
F: drivers/lightnvm/
|
||||
F: include/linux/lightnvm.h
|
||||
F: include/uapi/linux/lightnvm.h
|
||||
|
||||
LINEAR RANGES HELPERS
|
||||
M: Mark Brown <broonie@kernel.org>
|
||||
R: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
|
||||
|
@ -51,8 +51,6 @@ source "drivers/net/Kconfig"
|
||||
|
||||
source "drivers/isdn/Kconfig"
|
||||
|
||||
source "drivers/lightnvm/Kconfig"
|
||||
|
||||
# input before char - char/joystick depends on it. As does USB.
|
||||
|
||||
source "drivers/input/Kconfig"
|
||||
|
@ -70,7 +70,6 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/
|
||||
obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
|
||||
|
||||
obj-$(CONFIG_PARPORT) += parport/
|
||||
obj-$(CONFIG_NVM) += lightnvm/
|
||||
obj-y += base/ block/ misc/ mfd/ nfc/
|
||||
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
|
||||
obj-$(CONFIG_DAX) += dax/
|
||||
|
@ -4029,23 +4029,23 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
|
||||
if (fdc_state[FDC(drive)].rawcmd == 1)
|
||||
fdc_state[FDC(drive)].rawcmd = 2;
|
||||
|
||||
if (mode & (FMODE_READ|FMODE_WRITE)) {
|
||||
drive_state[drive].last_checked = 0;
|
||||
clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags);
|
||||
if (bdev_check_media_change(bdev))
|
||||
floppy_revalidate(bdev->bd_disk);
|
||||
if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
|
||||
goto out;
|
||||
if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
|
||||
if (!(mode & FMODE_NDELAY)) {
|
||||
if (mode & (FMODE_READ|FMODE_WRITE)) {
|
||||
drive_state[drive].last_checked = 0;
|
||||
clear_bit(FD_OPEN_SHOULD_FAIL_BIT,
|
||||
&drive_state[drive].flags);
|
||||
if (bdev_check_media_change(bdev))
|
||||
floppy_revalidate(bdev->bd_disk);
|
||||
if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
|
||||
goto out;
|
||||
if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
|
||||
goto out;
|
||||
}
|
||||
res = -EROFS;
|
||||
if ((mode & FMODE_WRITE) &&
|
||||
!test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags))
|
||||
goto out;
|
||||
}
|
||||
|
||||
res = -EROFS;
|
||||
|
||||
if ((mode & FMODE_WRITE) &&
|
||||
!test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags))
|
||||
goto out;
|
||||
|
||||
mutex_unlock(&open_lock);
|
||||
mutex_unlock(&floppy_mutex);
|
||||
return 0;
|
||||
|
@ -49,6 +49,7 @@
|
||||
|
||||
static DEFINE_IDR(nbd_index_idr);
|
||||
static DEFINE_MUTEX(nbd_index_mutex);
|
||||
static struct workqueue_struct *nbd_del_wq;
|
||||
static int nbd_total_devices = 0;
|
||||
|
||||
struct nbd_sock {
|
||||
@ -113,12 +114,12 @@ struct nbd_device {
|
||||
struct mutex config_lock;
|
||||
struct gendisk *disk;
|
||||
struct workqueue_struct *recv_workq;
|
||||
struct work_struct remove_work;
|
||||
|
||||
struct list_head list;
|
||||
struct task_struct *task_recv;
|
||||
struct task_struct *task_setup;
|
||||
|
||||
struct completion *destroy_complete;
|
||||
unsigned long flags;
|
||||
|
||||
char *backend;
|
||||
@ -237,32 +238,36 @@ static void nbd_dev_remove(struct nbd_device *nbd)
|
||||
{
|
||||
struct gendisk *disk = nbd->disk;
|
||||
|
||||
if (disk) {
|
||||
del_gendisk(disk);
|
||||
blk_cleanup_disk(disk);
|
||||
blk_mq_free_tag_set(&nbd->tag_set);
|
||||
}
|
||||
del_gendisk(disk);
|
||||
blk_cleanup_disk(disk);
|
||||
blk_mq_free_tag_set(&nbd->tag_set);
|
||||
|
||||
/*
|
||||
* Place this in the last just before the nbd is freed to
|
||||
* make sure that the disk and the related kobject are also
|
||||
* totally removed to avoid duplicate creation of the same
|
||||
* one.
|
||||
* Remove from idr after del_gendisk() completes, so if the same ID is
|
||||
* reused, the following add_disk() will succeed.
|
||||
*/
|
||||
if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
|
||||
complete(nbd->destroy_complete);
|
||||
mutex_lock(&nbd_index_mutex);
|
||||
idr_remove(&nbd_index_idr, nbd->index);
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
|
||||
kfree(nbd);
|
||||
}
|
||||
|
||||
static void nbd_dev_remove_work(struct work_struct *work)
|
||||
{
|
||||
nbd_dev_remove(container_of(work, struct nbd_device, remove_work));
|
||||
}
|
||||
|
||||
static void nbd_put(struct nbd_device *nbd)
|
||||
{
|
||||
if (refcount_dec_and_mutex_lock(&nbd->refs,
|
||||
&nbd_index_mutex)) {
|
||||
idr_remove(&nbd_index_idr, nbd->index);
|
||||
if (!refcount_dec_and_test(&nbd->refs))
|
||||
return;
|
||||
|
||||
/* Call del_gendisk() asynchrounously to prevent deadlock */
|
||||
if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
|
||||
queue_work(nbd_del_wq, &nbd->remove_work);
|
||||
else
|
||||
nbd_dev_remove(nbd);
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
static int nbd_disconnected(struct nbd_config *config)
|
||||
@ -1388,6 +1393,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct nbd_config *config = nbd->config;
|
||||
loff_t bytesize;
|
||||
|
||||
switch (cmd) {
|
||||
case NBD_DISCONNECT:
|
||||
@ -1402,8 +1408,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
case NBD_SET_SIZE:
|
||||
return nbd_set_size(nbd, arg, config->blksize);
|
||||
case NBD_SET_SIZE_BLOCKS:
|
||||
return nbd_set_size(nbd, arg * config->blksize,
|
||||
config->blksize);
|
||||
if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize))
|
||||
return -EINVAL;
|
||||
return nbd_set_size(nbd, bytesize, config->blksize);
|
||||
case NBD_SET_TIMEOUT:
|
||||
nbd_set_cmd_timeout(nbd, arg);
|
||||
return 0;
|
||||
@ -1665,7 +1672,7 @@ static const struct blk_mq_ops nbd_mq_ops = {
|
||||
.timeout = nbd_xmit_timeout,
|
||||
};
|
||||
|
||||
static int nbd_dev_add(int index)
|
||||
static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
|
||||
{
|
||||
struct nbd_device *nbd;
|
||||
struct gendisk *disk;
|
||||
@ -1683,13 +1690,14 @@ static int nbd_dev_add(int index)
|
||||
nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
|
||||
BLK_MQ_F_BLOCKING;
|
||||
nbd->tag_set.driver_data = nbd;
|
||||
nbd->destroy_complete = NULL;
|
||||
INIT_WORK(&nbd->remove_work, nbd_dev_remove_work);
|
||||
nbd->backend = NULL;
|
||||
|
||||
err = blk_mq_alloc_tag_set(&nbd->tag_set);
|
||||
if (err)
|
||||
goto out_free_nbd;
|
||||
|
||||
mutex_lock(&nbd_index_mutex);
|
||||
if (index >= 0) {
|
||||
err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
|
||||
GFP_KERNEL);
|
||||
@ -1700,9 +1708,10 @@ static int nbd_dev_add(int index)
|
||||
if (err >= 0)
|
||||
index = err;
|
||||
}
|
||||
nbd->index = index;
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
if (err < 0)
|
||||
goto out_free_tags;
|
||||
nbd->index = index;
|
||||
|
||||
disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
|
||||
if (IS_ERR(disk)) {
|
||||
@ -1726,38 +1735,65 @@ static int nbd_dev_add(int index)
|
||||
|
||||
mutex_init(&nbd->config_lock);
|
||||
refcount_set(&nbd->config_refs, 0);
|
||||
refcount_set(&nbd->refs, 1);
|
||||
/*
|
||||
* Start out with a zero references to keep other threads from using
|
||||
* this device until it is fully initialized.
|
||||
*/
|
||||
refcount_set(&nbd->refs, 0);
|
||||
INIT_LIST_HEAD(&nbd->list);
|
||||
disk->major = NBD_MAJOR;
|
||||
|
||||
/* Too big first_minor can cause duplicate creation of
|
||||
* sysfs files/links, since first_minor will be truncated to
|
||||
* byte in __device_add_disk().
|
||||
*/
|
||||
disk->first_minor = index << part_shift;
|
||||
if (disk->first_minor > 0xff) {
|
||||
err = -EINVAL;
|
||||
goto out_free_idr;
|
||||
}
|
||||
|
||||
disk->minors = 1 << part_shift;
|
||||
disk->fops = &nbd_fops;
|
||||
disk->private_data = nbd;
|
||||
sprintf(disk->disk_name, "nbd%d", index);
|
||||
add_disk(disk);
|
||||
|
||||
/*
|
||||
* Now publish the device.
|
||||
*/
|
||||
refcount_set(&nbd->refs, refs);
|
||||
nbd_total_devices++;
|
||||
return index;
|
||||
return nbd;
|
||||
|
||||
out_free_idr:
|
||||
mutex_lock(&nbd_index_mutex);
|
||||
idr_remove(&nbd_index_idr, index);
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
out_free_tags:
|
||||
blk_mq_free_tag_set(&nbd->tag_set);
|
||||
out_free_nbd:
|
||||
kfree(nbd);
|
||||
out:
|
||||
return err;
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int find_free_cb(int id, void *ptr, void *data)
|
||||
static struct nbd_device *nbd_find_get_unused(void)
|
||||
{
|
||||
struct nbd_device *nbd = ptr;
|
||||
struct nbd_device **found = data;
|
||||
struct nbd_device *nbd;
|
||||
int id;
|
||||
|
||||
if (!refcount_read(&nbd->config_refs)) {
|
||||
*found = nbd;
|
||||
return 1;
|
||||
lockdep_assert_held(&nbd_index_mutex);
|
||||
|
||||
idr_for_each_entry(&nbd_index_idr, nbd, id) {
|
||||
if (refcount_read(&nbd->config_refs) ||
|
||||
test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
|
||||
continue;
|
||||
if (refcount_inc_not_zero(&nbd->refs))
|
||||
return nbd;
|
||||
}
|
||||
return 0;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Netlink interface. */
|
||||
@ -1806,8 +1842,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
|
||||
|
||||
static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK(destroy_complete);
|
||||
struct nbd_device *nbd = NULL;
|
||||
struct nbd_device *nbd;
|
||||
struct nbd_config *config;
|
||||
int index = -1;
|
||||
int ret;
|
||||
@ -1829,56 +1864,30 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
|
||||
again:
|
||||
mutex_lock(&nbd_index_mutex);
|
||||
if (index == -1) {
|
||||
ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd);
|
||||
if (ret == 0) {
|
||||
int new_index;
|
||||
new_index = nbd_dev_add(-1);
|
||||
if (new_index < 0) {
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
printk(KERN_ERR "nbd: failed to add new device\n");
|
||||
return new_index;
|
||||
}
|
||||
nbd = idr_find(&nbd_index_idr, new_index);
|
||||
}
|
||||
nbd = nbd_find_get_unused();
|
||||
} else {
|
||||
nbd = idr_find(&nbd_index_idr, index);
|
||||
if (!nbd) {
|
||||
ret = nbd_dev_add(index);
|
||||
if (ret < 0) {
|
||||
if (nbd) {
|
||||
if ((test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
|
||||
test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
|
||||
!refcount_inc_not_zero(&nbd->refs)) {
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
printk(KERN_ERR "nbd: failed to add new device\n");
|
||||
return ret;
|
||||
pr_err("nbd: device at index %d is going down\n",
|
||||
index);
|
||||
return -EINVAL;
|
||||
}
|
||||
nbd = idr_find(&nbd_index_idr, index);
|
||||
}
|
||||
}
|
||||
if (!nbd) {
|
||||
printk(KERN_ERR "nbd: couldn't find device at index %d\n",
|
||||
index);
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
|
||||
test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
|
||||
nbd->destroy_complete = &destroy_complete;
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
|
||||
/* Wait untill the the nbd stuff is totally destroyed */
|
||||
wait_for_completion(&destroy_complete);
|
||||
goto again;
|
||||
}
|
||||
|
||||
if (!refcount_inc_not_zero(&nbd->refs)) {
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
if (index == -1)
|
||||
goto again;
|
||||
printk(KERN_ERR "nbd: device at index %d is going down\n",
|
||||
index);
|
||||
return -EINVAL;
|
||||
}
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
|
||||
if (!nbd) {
|
||||
nbd = nbd_dev_add(index, 2);
|
||||
if (IS_ERR(nbd)) {
|
||||
pr_err("nbd: failed to add new device\n");
|
||||
return PTR_ERR(nbd);
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&nbd->config_lock);
|
||||
if (refcount_read(&nbd->config_refs)) {
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
@ -2424,16 +2433,21 @@ static int __init nbd_init(void)
|
||||
if (register_blkdev(NBD_MAJOR, "nbd"))
|
||||
return -EIO;
|
||||
|
||||
nbd_del_wq = alloc_workqueue("nbd-del", WQ_UNBOUND, 0);
|
||||
if (!nbd_del_wq) {
|
||||
unregister_blkdev(NBD_MAJOR, "nbd");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (genl_register_family(&nbd_genl_family)) {
|
||||
destroy_workqueue(nbd_del_wq);
|
||||
unregister_blkdev(NBD_MAJOR, "nbd");
|
||||
return -EINVAL;
|
||||
}
|
||||
nbd_dbg_init();
|
||||
|
||||
mutex_lock(&nbd_index_mutex);
|
||||
for (i = 0; i < nbds_max; i++)
|
||||
nbd_dev_add(i);
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
nbd_dev_add(i, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2442,7 +2456,10 @@ static int nbd_exit_cb(int id, void *ptr, void *data)
|
||||
struct list_head *list = (struct list_head *)data;
|
||||
struct nbd_device *nbd = ptr;
|
||||
|
||||
list_add_tail(&nbd->list, list);
|
||||
/* Skip nbd that is being removed asynchronously */
|
||||
if (refcount_read(&nbd->refs))
|
||||
list_add_tail(&nbd->list, list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2465,6 +2482,9 @@ static void __exit nbd_cleanup(void)
|
||||
nbd_put(nbd);
|
||||
}
|
||||
|
||||
/* Also wait for nbd_dev_remove_work() completes */
|
||||
destroy_workqueue(nbd_del_wq);
|
||||
|
||||
idr_destroy(&nbd_index_idr);
|
||||
genl_unregister_family(&nbd_genl_family);
|
||||
unregister_blkdev(NBD_MAJOR, "nbd");
|
||||
|
@ -227,17 +227,17 @@ static ssize_t state_show(struct kobject *kobj,
|
||||
|
||||
switch (dev->dev_state) {
|
||||
case DEV_STATE_INIT:
|
||||
return snprintf(page, PAGE_SIZE, "init\n");
|
||||
return sysfs_emit(page, "init\n");
|
||||
case DEV_STATE_MAPPED:
|
||||
/* TODO fix cli tool before changing to proper state */
|
||||
return snprintf(page, PAGE_SIZE, "open\n");
|
||||
return sysfs_emit(page, "open\n");
|
||||
case DEV_STATE_MAPPED_DISCONNECTED:
|
||||
/* TODO fix cli tool before changing to proper state */
|
||||
return snprintf(page, PAGE_SIZE, "closed\n");
|
||||
return sysfs_emit(page, "closed\n");
|
||||
case DEV_STATE_UNMAPPED:
|
||||
return snprintf(page, PAGE_SIZE, "unmapped\n");
|
||||
return sysfs_emit(page, "unmapped\n");
|
||||
default:
|
||||
return snprintf(page, PAGE_SIZE, "unknown\n");
|
||||
return sysfs_emit(page, "unknown\n");
|
||||
}
|
||||
}
|
||||
|
||||
@ -263,7 +263,7 @@ static ssize_t mapping_path_show(struct kobject *kobj,
|
||||
|
||||
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
|
||||
|
||||
return scnprintf(page, PAGE_SIZE, "%s\n", dev->pathname);
|
||||
return sysfs_emit(page, "%s\n", dev->pathname);
|
||||
}
|
||||
|
||||
static struct kobj_attribute rnbd_clt_mapping_path_attr =
|
||||
@ -276,8 +276,7 @@ static ssize_t access_mode_show(struct kobject *kobj,
|
||||
|
||||
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
|
||||
|
||||
return snprintf(page, PAGE_SIZE, "%s\n",
|
||||
rnbd_access_mode_str(dev->access_mode));
|
||||
return sysfs_emit(page, "%s\n", rnbd_access_mode_str(dev->access_mode));
|
||||
}
|
||||
|
||||
static struct kobj_attribute rnbd_clt_access_mode =
|
||||
@ -286,8 +285,8 @@ static struct kobj_attribute rnbd_clt_access_mode =
|
||||
static ssize_t rnbd_clt_unmap_dev_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "Usage: echo <normal|force> > %s\n",
|
||||
attr->attr.name);
|
||||
return sysfs_emit(page, "Usage: echo <normal|force> > %s\n",
|
||||
attr->attr.name);
|
||||
}
|
||||
|
||||
static ssize_t rnbd_clt_unmap_dev_store(struct kobject *kobj,
|
||||
@ -357,9 +356,8 @@ static ssize_t rnbd_clt_resize_dev_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE,
|
||||
"Usage: echo <new size in sectors> > %s\n",
|
||||
attr->attr.name);
|
||||
return sysfs_emit(page, "Usage: echo <new size in sectors> > %s\n",
|
||||
attr->attr.name);
|
||||
}
|
||||
|
||||
static ssize_t rnbd_clt_resize_dev_store(struct kobject *kobj,
|
||||
@ -390,8 +388,7 @@ static struct kobj_attribute rnbd_clt_resize_dev_attr =
|
||||
static ssize_t rnbd_clt_remap_dev_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "Usage: echo <1> > %s\n",
|
||||
attr->attr.name);
|
||||
return sysfs_emit(page, "Usage: echo <1> > %s\n", attr->attr.name);
|
||||
}
|
||||
|
||||
static ssize_t rnbd_clt_remap_dev_store(struct kobject *kobj,
|
||||
@ -436,7 +433,7 @@ static ssize_t session_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
|
||||
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
|
||||
|
||||
return scnprintf(page, PAGE_SIZE, "%s\n", dev->sess->sessname);
|
||||
return sysfs_emit(page, "%s\n", dev->sess->sessname);
|
||||
}
|
||||
|
||||
static struct kobj_attribute rnbd_clt_session_attr =
|
||||
@ -499,8 +496,8 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE,
|
||||
"Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
|
||||
return sysfs_emit(page,
|
||||
"Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
|
||||
attr->attr.name);
|
||||
}
|
||||
|
||||
|
@ -271,7 +271,7 @@ unlock:
|
||||
*/
|
||||
if (cpu_q)
|
||||
*cpup = cpu_q->cpu;
|
||||
put_cpu_var(sess->cpu_rr);
|
||||
put_cpu_ptr(sess->cpu_rr);
|
||||
|
||||
if (q)
|
||||
rnbd_clt_dev_requeue(q);
|
||||
|
@ -90,8 +90,8 @@ static ssize_t read_only_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
|
||||
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
|
||||
|
||||
return scnprintf(page, PAGE_SIZE, "%d\n",
|
||||
!(sess_dev->open_flags & FMODE_WRITE));
|
||||
return sysfs_emit(page, "%d\n",
|
||||
!(sess_dev->open_flags & FMODE_WRITE));
|
||||
}
|
||||
|
||||
static struct kobj_attribute rnbd_srv_dev_session_ro_attr =
|
||||
@ -105,8 +105,8 @@ static ssize_t access_mode_show(struct kobject *kobj,
|
||||
|
||||
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
|
||||
|
||||
return scnprintf(page, PAGE_SIZE, "%s\n",
|
||||
rnbd_access_mode_str(sess_dev->access_mode));
|
||||
return sysfs_emit(page, "%s\n",
|
||||
rnbd_access_mode_str(sess_dev->access_mode));
|
||||
}
|
||||
|
||||
static struct kobj_attribute rnbd_srv_dev_session_access_mode_attr =
|
||||
@ -119,7 +119,7 @@ static ssize_t mapping_path_show(struct kobject *kobj,
|
||||
|
||||
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
|
||||
|
||||
return scnprintf(page, PAGE_SIZE, "%s\n", sess_dev->pathname);
|
||||
return sysfs_emit(page, "%s\n", sess_dev->pathname);
|
||||
}
|
||||
|
||||
static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr =
|
||||
@ -128,8 +128,8 @@ static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr =
|
||||
static ssize_t rnbd_srv_dev_session_force_close_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n",
|
||||
attr->attr.name);
|
||||
return sysfs_emit(page, "Usage: echo 1 > %s\n",
|
||||
attr->attr.name);
|
||||
}
|
||||
|
||||
static ssize_t rnbd_srv_dev_session_force_close_store(struct kobject *kobj,
|
||||
|
@ -1092,7 +1092,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
err = xlbd_reserve_minors(minor, nr_minors);
|
||||
if (err)
|
||||
return err;
|
||||
err = -ENODEV;
|
||||
|
||||
memset(&info->tag_set, 0, sizeof(info->tag_set));
|
||||
info->tag_set.ops = &blkfront_mq_ops;
|
||||
|
@ -1,44 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# Open-Channel SSD NVM configuration
|
||||
#
|
||||
|
||||
menuconfig NVM
|
||||
bool "Open-Channel SSD target support (DEPRECATED)"
|
||||
depends on BLOCK
|
||||
help
|
||||
Say Y here to get to enable Open-channel SSDs.
|
||||
|
||||
Open-Channel SSDs implement a set of extension to SSDs, that
|
||||
exposes direct access to the underlying non-volatile memory.
|
||||
|
||||
If you say N, all options in this submenu will be skipped and disabled
|
||||
only do this if you know what you are doing.
|
||||
|
||||
This code is deprecated and will be removed in Linux 5.15.
|
||||
|
||||
if NVM
|
||||
|
||||
config NVM_PBLK
|
||||
tristate "Physical Block Device Open-Channel SSD target"
|
||||
select CRC32
|
||||
help
|
||||
Allows an open-channel SSD to be exposed as a block device to the
|
||||
host. The target assumes the device exposes raw flash and must be
|
||||
explicitly managed by the host.
|
||||
|
||||
Please note the disk format is considered EXPERIMENTAL for now.
|
||||
|
||||
if NVM_PBLK
|
||||
|
||||
config NVM_PBLK_DEBUG
|
||||
bool "PBlk Debug Support"
|
||||
default n
|
||||
help
|
||||
Enables debug support for pblk. This includes extra checks, more
|
||||
vocal error messages, and extra tracking fields in the pblk sysfs
|
||||
entries.
|
||||
|
||||
endif # NVM_PBLK_DEBUG
|
||||
|
||||
endif # NVM
|
@ -1,11 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Makefile for Open-Channel SSDs.
|
||||
#
|
||||
|
||||
obj-$(CONFIG_NVM) := core.o
|
||||
obj-$(CONFIG_NVM_PBLK) += pblk.o
|
||||
pblk-y := pblk-init.o pblk-core.o pblk-rb.o \
|
||||
pblk-write.o pblk-cache.o pblk-read.o \
|
||||
pblk-gc.o pblk-recovery.o pblk-map.o \
|
||||
pblk-rl.o pblk-sysfs.o
|
File diff suppressed because it is too large
Load Diff
@ -1,137 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
|
||||
* Matias Bjorling <matias@cnexlabs.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* pblk-cache.c - pblk's write cache
|
||||
*/
|
||||
|
||||
#include "pblk.h"
|
||||
|
||||
void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct pblk_w_ctx w_ctx;
|
||||
sector_t lba = pblk_get_lba(bio);
|
||||
unsigned long start_time;
|
||||
unsigned int bpos, pos;
|
||||
int nr_entries = pblk_get_secs(bio);
|
||||
int i, ret;
|
||||
|
||||
start_time = bio_start_io_acct(bio);
|
||||
|
||||
/* Update the write buffer head (mem) with the entries that we can
|
||||
* write. The write in itself cannot fail, so there is no need to
|
||||
* rollback from here on.
|
||||
*/
|
||||
retry:
|
||||
ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
|
||||
switch (ret) {
|
||||
case NVM_IO_REQUEUE:
|
||||
io_schedule();
|
||||
goto retry;
|
||||
case NVM_IO_ERR:
|
||||
pblk_pipeline_stop(pblk);
|
||||
bio_io_error(bio);
|
||||
goto out;
|
||||
}
|
||||
|
||||
pblk_ppa_set_empty(&w_ctx.ppa);
|
||||
w_ctx.flags = flags;
|
||||
if (bio->bi_opf & REQ_PREFLUSH) {
|
||||
w_ctx.flags |= PBLK_FLUSH_ENTRY;
|
||||
pblk_write_kick(pblk);
|
||||
}
|
||||
|
||||
if (unlikely(!bio_has_data(bio)))
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < nr_entries; i++) {
|
||||
void *data = bio_data(bio);
|
||||
|
||||
w_ctx.lba = lba + i;
|
||||
|
||||
pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i);
|
||||
pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos);
|
||||
|
||||
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
|
||||
}
|
||||
|
||||
atomic64_add(nr_entries, &pblk->user_wa);
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_add(nr_entries, &pblk->inflight_writes);
|
||||
atomic_long_add(nr_entries, &pblk->req_writes);
|
||||
#endif
|
||||
|
||||
pblk_rl_inserted(&pblk->rl, nr_entries);
|
||||
|
||||
out:
|
||||
bio_end_io_acct(bio, start_time);
|
||||
pblk_write_should_kick(pblk);
|
||||
|
||||
if (ret == NVM_IO_DONE)
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* On GC the incoming lbas are not necessarily sequential. Also, some of the
|
||||
* lbas might not be valid entries, which are marked as empty by the GC thread
|
||||
*/
|
||||
int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
|
||||
{
|
||||
struct pblk_w_ctx w_ctx;
|
||||
unsigned int bpos, pos;
|
||||
void *data = gc_rq->data;
|
||||
int i, valid_entries;
|
||||
|
||||
/* Update the write buffer head (mem) with the entries that we can
|
||||
* write. The write in itself cannot fail, so there is no need to
|
||||
* rollback from here on.
|
||||
*/
|
||||
retry:
|
||||
if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) {
|
||||
io_schedule();
|
||||
goto retry;
|
||||
}
|
||||
|
||||
w_ctx.flags = PBLK_IOTYPE_GC;
|
||||
pblk_ppa_set_empty(&w_ctx.ppa);
|
||||
|
||||
for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) {
|
||||
if (gc_rq->lba_list[i] == ADDR_EMPTY)
|
||||
continue;
|
||||
|
||||
w_ctx.lba = gc_rq->lba_list[i];
|
||||
|
||||
pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries);
|
||||
pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line,
|
||||
gc_rq->paddr_list[i], pos);
|
||||
|
||||
data += PBLK_EXPOSED_PAGE_SIZE;
|
||||
valid_entries++;
|
||||
}
|
||||
|
||||
WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
|
||||
"pblk: inconsistent GC write\n");
|
||||
|
||||
atomic64_add(valid_entries, &pblk->gc_wa);
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_add(valid_entries, &pblk->inflight_writes);
|
||||
atomic_long_add(valid_entries, &pblk->recov_gc_writes);
|
||||
#endif
|
||||
|
||||
pblk_write_should_kick(pblk);
|
||||
return NVM_IO_OK;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,726 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
|
||||
* Matias Bjorling <matias@cnexlabs.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* pblk-gc.c - pblk's garbage collector
|
||||
*/
|
||||
|
||||
#include "pblk.h"
|
||||
#include "pblk-trace.h"
|
||||
#include <linux/delay.h>
|
||||
|
||||
|
||||
static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
|
||||
{
|
||||
vfree(gc_rq->data);
|
||||
kfree(gc_rq);
|
||||
}
|
||||
|
||||
static int pblk_gc_write(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
struct pblk_gc_rq *gc_rq, *tgc_rq;
|
||||
LIST_HEAD(w_list);
|
||||
|
||||
spin_lock(&gc->w_lock);
|
||||
if (list_empty(&gc->w_list)) {
|
||||
spin_unlock(&gc->w_lock);
|
||||
return 1;
|
||||
}
|
||||
|
||||
list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
|
||||
gc->w_entries = 0;
|
||||
spin_unlock(&gc->w_lock);
|
||||
|
||||
list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
|
||||
pblk_write_gc_to_cache(pblk, gc_rq);
|
||||
list_del(&gc_rq->list);
|
||||
kref_put(&gc_rq->line->ref, pblk_line_put);
|
||||
pblk_gc_free_gc_rq(gc_rq);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pblk_gc_writer_kick(struct pblk_gc *gc)
|
||||
{
|
||||
wake_up_process(gc->gc_writer_ts);
|
||||
}
|
||||
|
||||
void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct list_head *move_list;
|
||||
|
||||
spin_lock(&l_mg->gc_lock);
|
||||
spin_lock(&line->lock);
|
||||
WARN_ON(line->state != PBLK_LINESTATE_GC);
|
||||
line->state = PBLK_LINESTATE_CLOSED;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
|
||||
/* We need to reset gc_group in order to ensure that
|
||||
* pblk_line_gc_list will return proper move_list
|
||||
* since right now current line is not on any of the
|
||||
* gc lists.
|
||||
*/
|
||||
line->gc_group = PBLK_LINEGC_NONE;
|
||||
move_list = pblk_line_gc_list(pblk, line);
|
||||
spin_unlock(&line->lock);
|
||||
list_add_tail(&line->list, move_list);
|
||||
spin_unlock(&l_mg->gc_lock);
|
||||
}
|
||||
|
||||
static void pblk_gc_line_ws(struct work_struct *work)
|
||||
{
|
||||
struct pblk_line_ws *gc_rq_ws = container_of(work,
|
||||
struct pblk_line_ws, ws);
|
||||
struct pblk *pblk = gc_rq_ws->pblk;
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
struct pblk_line *line = gc_rq_ws->line;
|
||||
struct pblk_gc_rq *gc_rq = gc_rq_ws->priv;
|
||||
int ret;
|
||||
|
||||
up(&gc->gc_sem);
|
||||
|
||||
/* Read from GC victim block */
|
||||
ret = pblk_submit_read_gc(pblk, gc_rq);
|
||||
if (ret) {
|
||||
line->w_err_gc->has_gc_err = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!gc_rq->secs_to_gc)
|
||||
goto out;
|
||||
|
||||
retry:
|
||||
spin_lock(&gc->w_lock);
|
||||
if (gc->w_entries >= PBLK_GC_RQ_QD) {
|
||||
spin_unlock(&gc->w_lock);
|
||||
pblk_gc_writer_kick(&pblk->gc);
|
||||
usleep_range(128, 256);
|
||||
goto retry;
|
||||
}
|
||||
gc->w_entries++;
|
||||
list_add_tail(&gc_rq->list, &gc->w_list);
|
||||
spin_unlock(&gc->w_lock);
|
||||
|
||||
pblk_gc_writer_kick(&pblk->gc);
|
||||
|
||||
kfree(gc_rq_ws);
|
||||
return;
|
||||
|
||||
out:
|
||||
pblk_gc_free_gc_rq(gc_rq);
|
||||
kref_put(&line->ref, pblk_line_put);
|
||||
kfree(gc_rq_ws);
|
||||
}
|
||||
|
||||
static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
|
||||
struct pblk_line *line)
|
||||
{
|
||||
struct line_emeta *emeta_buf;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
unsigned int lba_list_size = lm->emeta_len[2];
|
||||
__le64 *lba_list;
|
||||
int ret;
|
||||
|
||||
emeta_buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL);
|
||||
if (!emeta_buf)
|
||||
return NULL;
|
||||
|
||||
ret = pblk_line_emeta_read(pblk, line, emeta_buf);
|
||||
if (ret) {
|
||||
pblk_err(pblk, "line %d read emeta failed (%d)\n",
|
||||
line->id, ret);
|
||||
kvfree(emeta_buf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* If this read fails, it means that emeta is corrupted.
|
||||
* For now, leave the line untouched.
|
||||
* TODO: Implement a recovery routine that scans and moves
|
||||
* all sectors on the line.
|
||||
*/
|
||||
|
||||
ret = pblk_recov_check_emeta(pblk, emeta_buf);
|
||||
if (ret) {
|
||||
pblk_err(pblk, "inconsistent emeta (line %d)\n",
|
||||
line->id);
|
||||
kvfree(emeta_buf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lba_list = kvmalloc(lba_list_size, GFP_KERNEL);
|
||||
|
||||
if (lba_list)
|
||||
memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size);
|
||||
|
||||
kvfree(emeta_buf);
|
||||
|
||||
return lba_list;
|
||||
}
|
||||
|
||||
static void pblk_gc_line_prepare_ws(struct work_struct *work)
|
||||
{
|
||||
struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
|
||||
ws);
|
||||
struct pblk *pblk = line_ws->pblk;
|
||||
struct pblk_line *line = line_ws->line;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
struct pblk_line_ws *gc_rq_ws;
|
||||
struct pblk_gc_rq *gc_rq;
|
||||
__le64 *lba_list;
|
||||
unsigned long *invalid_bitmap;
|
||||
int sec_left, nr_secs, bit;
|
||||
|
||||
invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
|
||||
if (!invalid_bitmap)
|
||||
goto fail_free_ws;
|
||||
|
||||
if (line->w_err_gc->has_write_err) {
|
||||
lba_list = line->w_err_gc->lba_list;
|
||||
line->w_err_gc->lba_list = NULL;
|
||||
} else {
|
||||
lba_list = get_lba_list_from_emeta(pblk, line);
|
||||
if (!lba_list) {
|
||||
pblk_err(pblk, "could not interpret emeta (line %d)\n",
|
||||
line->id);
|
||||
goto fail_free_invalid_bitmap;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&line->lock);
|
||||
bitmap_copy(invalid_bitmap, line->invalid_bitmap, lm->sec_per_line);
|
||||
sec_left = pblk_line_vsc(line);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
if (sec_left < 0) {
|
||||
pblk_err(pblk, "corrupted GC line (%d)\n", line->id);
|
||||
goto fail_free_lba_list;
|
||||
}
|
||||
|
||||
bit = -1;
|
||||
next_rq:
|
||||
gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
|
||||
if (!gc_rq)
|
||||
goto fail_free_lba_list;
|
||||
|
||||
nr_secs = 0;
|
||||
do {
|
||||
bit = find_next_zero_bit(invalid_bitmap, lm->sec_per_line,
|
||||
bit + 1);
|
||||
if (bit > line->emeta_ssec)
|
||||
break;
|
||||
|
||||
gc_rq->paddr_list[nr_secs] = bit;
|
||||
gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
|
||||
} while (nr_secs < pblk->max_write_pgs);
|
||||
|
||||
if (unlikely(!nr_secs)) {
|
||||
kfree(gc_rq);
|
||||
goto out;
|
||||
}
|
||||
|
||||
gc_rq->nr_secs = nr_secs;
|
||||
gc_rq->line = line;
|
||||
|
||||
gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
|
||||
if (!gc_rq->data)
|
||||
goto fail_free_gc_rq;
|
||||
|
||||
gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
|
||||
if (!gc_rq_ws)
|
||||
goto fail_free_gc_data;
|
||||
|
||||
gc_rq_ws->pblk = pblk;
|
||||
gc_rq_ws->line = line;
|
||||
gc_rq_ws->priv = gc_rq;
|
||||
|
||||
/* The write GC path can be much slower than the read GC one due to
|
||||
* the budget imposed by the rate-limiter. Balance in case that we get
|
||||
* back pressure from the write GC path.
|
||||
*/
|
||||
while (down_timeout(&gc->gc_sem, msecs_to_jiffies(30000)))
|
||||
io_schedule();
|
||||
|
||||
kref_get(&line->ref);
|
||||
|
||||
INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws);
|
||||
queue_work(gc->gc_line_reader_wq, &gc_rq_ws->ws);
|
||||
|
||||
sec_left -= nr_secs;
|
||||
if (sec_left > 0)
|
||||
goto next_rq;
|
||||
|
||||
out:
|
||||
kvfree(lba_list);
|
||||
kfree(line_ws);
|
||||
kfree(invalid_bitmap);
|
||||
|
||||
kref_put(&line->ref, pblk_line_put);
|
||||
atomic_dec(&gc->read_inflight_gc);
|
||||
|
||||
return;
|
||||
|
||||
fail_free_gc_data:
|
||||
vfree(gc_rq->data);
|
||||
fail_free_gc_rq:
|
||||
kfree(gc_rq);
|
||||
fail_free_lba_list:
|
||||
kvfree(lba_list);
|
||||
fail_free_invalid_bitmap:
|
||||
kfree(invalid_bitmap);
|
||||
fail_free_ws:
|
||||
kfree(line_ws);
|
||||
|
||||
/* Line goes back to closed state, so we cannot release additional
|
||||
* reference for line, since we do that only when we want to do
|
||||
* gc to free line state transition.
|
||||
*/
|
||||
pblk_put_line_back(pblk, line);
|
||||
atomic_dec(&gc->read_inflight_gc);
|
||||
|
||||
pblk_err(pblk, "failed to GC line %d\n", line->id);
|
||||
}
|
||||
|
||||
static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
struct pblk_line_ws *line_ws;
|
||||
|
||||
pblk_debug(pblk, "line '%d' being reclaimed for GC\n", line->id);
|
||||
|
||||
line_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
|
||||
if (!line_ws)
|
||||
return -ENOMEM;
|
||||
|
||||
line_ws->pblk = pblk;
|
||||
line_ws->line = line;
|
||||
|
||||
atomic_inc(&gc->pipeline_gc);
|
||||
INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
|
||||
queue_work(gc->gc_reader_wq, &line_ws->ws);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pblk_gc_reader_kick(struct pblk_gc *gc)
|
||||
{
|
||||
wake_up_process(gc->gc_reader_ts);
|
||||
}
|
||||
|
||||
static void pblk_gc_kick(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
|
||||
pblk_gc_writer_kick(gc);
|
||||
pblk_gc_reader_kick(gc);
|
||||
|
||||
/* If we're shutting down GC, let's not start it up again */
|
||||
if (gc->gc_enabled) {
|
||||
wake_up_process(gc->gc_ts);
|
||||
mod_timer(&gc->gc_timer,
|
||||
jiffies + msecs_to_jiffies(GC_TIME_MSECS));
|
||||
}
|
||||
}
|
||||
|
||||
static int pblk_gc_read(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
struct pblk_line *line;
|
||||
|
||||
spin_lock(&gc->r_lock);
|
||||
if (list_empty(&gc->r_list)) {
|
||||
spin_unlock(&gc->r_lock);
|
||||
return 1;
|
||||
}
|
||||
|
||||
line = list_first_entry(&gc->r_list, struct pblk_line, list);
|
||||
list_del(&line->list);
|
||||
spin_unlock(&gc->r_lock);
|
||||
|
||||
pblk_gc_kick(pblk);
|
||||
|
||||
if (pblk_gc_line(pblk, line)) {
|
||||
pblk_err(pblk, "failed to GC line %d\n", line->id);
|
||||
/* rollback */
|
||||
spin_lock(&gc->r_lock);
|
||||
list_add_tail(&line->list, &gc->r_list);
|
||||
spin_unlock(&gc->r_lock);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
|
||||
struct list_head *group_list)
|
||||
{
|
||||
struct pblk_line *line, *victim;
|
||||
unsigned int line_vsc = ~0x0L, victim_vsc = ~0x0L;
|
||||
|
||||
victim = list_first_entry(group_list, struct pblk_line, list);
|
||||
|
||||
list_for_each_entry(line, group_list, list) {
|
||||
if (!atomic_read(&line->sec_to_update))
|
||||
line_vsc = le32_to_cpu(*line->vsc);
|
||||
if (line_vsc < victim_vsc) {
|
||||
victim = line;
|
||||
victim_vsc = le32_to_cpu(*victim->vsc);
|
||||
}
|
||||
}
|
||||
|
||||
if (victim_vsc == ~0x0)
|
||||
return NULL;
|
||||
|
||||
return victim;
|
||||
}
|
||||
|
||||
static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
|
||||
{
|
||||
unsigned int nr_blocks_free, nr_blocks_need;
|
||||
unsigned int werr_lines = atomic_read(&rl->werr_lines);
|
||||
|
||||
nr_blocks_need = pblk_rl_high_thrs(rl);
|
||||
nr_blocks_free = pblk_rl_nr_free_blks(rl);
|
||||
|
||||
/* This is not critical, no need to take lock here */
|
||||
return ((werr_lines > 0) ||
|
||||
((gc->gc_active) && (nr_blocks_need > nr_blocks_free)));
|
||||
}
|
||||
|
||||
void pblk_gc_free_full_lines(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
struct pblk_line *line;
|
||||
|
||||
do {
|
||||
spin_lock(&l_mg->gc_lock);
|
||||
if (list_empty(&l_mg->gc_full_list)) {
|
||||
spin_unlock(&l_mg->gc_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
line = list_first_entry(&l_mg->gc_full_list,
|
||||
struct pblk_line, list);
|
||||
|
||||
spin_lock(&line->lock);
|
||||
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
|
||||
line->state = PBLK_LINESTATE_GC;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
list_del(&line->list);
|
||||
spin_unlock(&l_mg->gc_lock);
|
||||
|
||||
atomic_inc(&gc->pipeline_gc);
|
||||
kref_put(&line->ref, pblk_line_put);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lines with no valid sectors will be returned to the free list immediately. If
|
||||
* GC is activated - either because the free block count is under the determined
|
||||
* threshold, or because it is being forced from user space - only lines with a
|
||||
* high count of invalid sectors will be recycled.
|
||||
*/
|
||||
static void pblk_gc_run(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
struct pblk_line *line;
|
||||
struct list_head *group_list;
|
||||
bool run_gc;
|
||||
int read_inflight_gc, gc_group = 0, prev_group = 0;
|
||||
|
||||
pblk_gc_free_full_lines(pblk);
|
||||
|
||||
run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
|
||||
if (!run_gc || (atomic_read(&gc->read_inflight_gc) >= PBLK_GC_L_QD))
|
||||
return;
|
||||
|
||||
next_gc_group:
|
||||
group_list = l_mg->gc_lists[gc_group++];
|
||||
|
||||
do {
|
||||
spin_lock(&l_mg->gc_lock);
|
||||
|
||||
line = pblk_gc_get_victim_line(pblk, group_list);
|
||||
if (!line) {
|
||||
spin_unlock(&l_mg->gc_lock);
|
||||
break;
|
||||
}
|
||||
|
||||
spin_lock(&line->lock);
|
||||
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
|
||||
line->state = PBLK_LINESTATE_GC;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
list_del(&line->list);
|
||||
spin_unlock(&l_mg->gc_lock);
|
||||
|
||||
spin_lock(&gc->r_lock);
|
||||
list_add_tail(&line->list, &gc->r_list);
|
||||
spin_unlock(&gc->r_lock);
|
||||
|
||||
read_inflight_gc = atomic_inc_return(&gc->read_inflight_gc);
|
||||
pblk_gc_reader_kick(gc);
|
||||
|
||||
prev_group = 1;
|
||||
|
||||
/* No need to queue up more GC lines than we can handle */
|
||||
run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
|
||||
if (!run_gc || read_inflight_gc >= PBLK_GC_L_QD)
|
||||
break;
|
||||
} while (1);
|
||||
|
||||
if (!prev_group && pblk->rl.rb_state > gc_group &&
|
||||
gc_group < PBLK_GC_NR_LISTS)
|
||||
goto next_gc_group;
|
||||
}
|
||||
|
||||
static void pblk_gc_timer(struct timer_list *t)
|
||||
{
|
||||
struct pblk *pblk = from_timer(pblk, t, gc.gc_timer);
|
||||
|
||||
pblk_gc_kick(pblk);
|
||||
}
|
||||
|
||||
static int pblk_gc_ts(void *data)
|
||||
{
|
||||
struct pblk *pblk = data;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
pblk_gc_run(pblk);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
io_schedule();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pblk_gc_writer_ts(void *data)
|
||||
{
|
||||
struct pblk *pblk = data;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
if (!pblk_gc_write(pblk))
|
||||
continue;
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
io_schedule();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pblk_gc_reader_ts(void *data)
|
||||
{
|
||||
struct pblk *pblk = data;
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
if (!pblk_gc_read(pblk))
|
||||
continue;
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
io_schedule();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
pblk_info(pblk, "flushing gc pipeline, %d lines left\n",
|
||||
atomic_read(&gc->pipeline_gc));
|
||||
#endif
|
||||
|
||||
do {
|
||||
if (!atomic_read(&gc->pipeline_gc))
|
||||
break;
|
||||
|
||||
schedule();
|
||||
} while (1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pblk_gc_start(struct pblk *pblk)
|
||||
{
|
||||
pblk->gc.gc_active = 1;
|
||||
pblk_debug(pblk, "gc start\n");
|
||||
}
|
||||
|
||||
void pblk_gc_should_start(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
|
||||
if (gc->gc_enabled && !gc->gc_active) {
|
||||
pblk_gc_start(pblk);
|
||||
pblk_gc_kick(pblk);
|
||||
}
|
||||
}
|
||||
|
||||
void pblk_gc_should_stop(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
|
||||
if (gc->gc_active && !gc->gc_forced)
|
||||
gc->gc_active = 0;
|
||||
}
|
||||
|
||||
void pblk_gc_should_kick(struct pblk *pblk)
|
||||
{
|
||||
pblk_rl_update_rates(&pblk->rl);
|
||||
}
|
||||
|
||||
void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
|
||||
int *gc_active)
|
||||
{
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
|
||||
spin_lock(&gc->lock);
|
||||
*gc_enabled = gc->gc_enabled;
|
||||
*gc_active = gc->gc_active;
|
||||
spin_unlock(&gc->lock);
|
||||
}
|
||||
|
||||
int pblk_gc_sysfs_force(struct pblk *pblk, int force)
|
||||
{
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
|
||||
if (force < 0 || force > 1)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&gc->lock);
|
||||
gc->gc_forced = force;
|
||||
|
||||
if (force)
|
||||
gc->gc_enabled = 1;
|
||||
else
|
||||
gc->gc_enabled = 0;
|
||||
spin_unlock(&gc->lock);
|
||||
|
||||
pblk_gc_should_start(pblk);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pblk_gc_init(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
int ret;
|
||||
|
||||
gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
|
||||
if (IS_ERR(gc->gc_ts)) {
|
||||
pblk_err(pblk, "could not allocate GC main kthread\n");
|
||||
return PTR_ERR(gc->gc_ts);
|
||||
}
|
||||
|
||||
gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
|
||||
"pblk-gc-writer-ts");
|
||||
if (IS_ERR(gc->gc_writer_ts)) {
|
||||
pblk_err(pblk, "could not allocate GC writer kthread\n");
|
||||
ret = PTR_ERR(gc->gc_writer_ts);
|
||||
goto fail_free_main_kthread;
|
||||
}
|
||||
|
||||
gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
|
||||
"pblk-gc-reader-ts");
|
||||
if (IS_ERR(gc->gc_reader_ts)) {
|
||||
pblk_err(pblk, "could not allocate GC reader kthread\n");
|
||||
ret = PTR_ERR(gc->gc_reader_ts);
|
||||
goto fail_free_writer_kthread;
|
||||
}
|
||||
|
||||
timer_setup(&gc->gc_timer, pblk_gc_timer, 0);
|
||||
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
|
||||
|
||||
gc->gc_active = 0;
|
||||
gc->gc_forced = 0;
|
||||
gc->gc_enabled = 1;
|
||||
gc->w_entries = 0;
|
||||
atomic_set(&gc->read_inflight_gc, 0);
|
||||
atomic_set(&gc->pipeline_gc, 0);
|
||||
|
||||
/* Workqueue that reads valid sectors from a line and submit them to the
|
||||
* GC writer to be recycled.
|
||||
*/
|
||||
gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
|
||||
WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
|
||||
if (!gc->gc_line_reader_wq) {
|
||||
pblk_err(pblk, "could not allocate GC line reader workqueue\n");
|
||||
ret = -ENOMEM;
|
||||
goto fail_free_reader_kthread;
|
||||
}
|
||||
|
||||
/* Workqueue that prepare lines for GC */
|
||||
gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
|
||||
WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
|
||||
if (!gc->gc_reader_wq) {
|
||||
pblk_err(pblk, "could not allocate GC reader workqueue\n");
|
||||
ret = -ENOMEM;
|
||||
goto fail_free_reader_line_wq;
|
||||
}
|
||||
|
||||
spin_lock_init(&gc->lock);
|
||||
spin_lock_init(&gc->w_lock);
|
||||
spin_lock_init(&gc->r_lock);
|
||||
|
||||
sema_init(&gc->gc_sem, PBLK_GC_RQ_QD);
|
||||
|
||||
INIT_LIST_HEAD(&gc->w_list);
|
||||
INIT_LIST_HEAD(&gc->r_list);
|
||||
|
||||
return 0;
|
||||
|
||||
fail_free_reader_line_wq:
|
||||
destroy_workqueue(gc->gc_line_reader_wq);
|
||||
fail_free_reader_kthread:
|
||||
kthread_stop(gc->gc_reader_ts);
|
||||
fail_free_writer_kthread:
|
||||
kthread_stop(gc->gc_writer_ts);
|
||||
fail_free_main_kthread:
|
||||
kthread_stop(gc->gc_ts);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void pblk_gc_exit(struct pblk *pblk, bool graceful)
|
||||
{
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
|
||||
gc->gc_enabled = 0;
|
||||
del_timer_sync(&gc->gc_timer);
|
||||
gc->gc_active = 0;
|
||||
|
||||
if (gc->gc_ts)
|
||||
kthread_stop(gc->gc_ts);
|
||||
|
||||
if (gc->gc_reader_ts)
|
||||
kthread_stop(gc->gc_reader_ts);
|
||||
|
||||
if (graceful) {
|
||||
flush_workqueue(gc->gc_reader_wq);
|
||||
flush_workqueue(gc->gc_line_reader_wq);
|
||||
}
|
||||
|
||||
destroy_workqueue(gc->gc_reader_wq);
|
||||
destroy_workqueue(gc->gc_line_reader_wq);
|
||||
|
||||
if (gc->gc_writer_ts)
|
||||
kthread_stop(gc->gc_writer_ts);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,210 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
|
||||
* Matias Bjorling <matias@cnexlabs.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* pblk-map.c - pblk's lba-ppa mapping strategy
|
||||
*
|
||||
*/
|
||||
|
||||
#include "pblk.h"
|
||||
|
||||
static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
|
||||
struct ppa_addr *ppa_list,
|
||||
unsigned long *lun_bitmap,
|
||||
void *meta_list,
|
||||
unsigned int valid_secs)
|
||||
{
|
||||
struct pblk_line *line = pblk_line_get_data(pblk);
|
||||
struct pblk_emeta *emeta;
|
||||
struct pblk_w_ctx *w_ctx;
|
||||
__le64 *lba_list;
|
||||
u64 paddr;
|
||||
int nr_secs = pblk->min_write_pgs;
|
||||
int i;
|
||||
|
||||
if (!line)
|
||||
return -ENOSPC;
|
||||
|
||||
if (pblk_line_is_full(line)) {
|
||||
struct pblk_line *prev_line = line;
|
||||
|
||||
/* If we cannot allocate a new line, make sure to store metadata
|
||||
* on current line and then fail
|
||||
*/
|
||||
line = pblk_line_replace_data(pblk);
|
||||
pblk_line_close_meta(pblk, prev_line);
|
||||
|
||||
if (!line) {
|
||||
pblk_pipeline_stop(pblk);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
emeta = line->emeta;
|
||||
lba_list = emeta_to_lbas(pblk, emeta->buf);
|
||||
|
||||
paddr = pblk_alloc_page(pblk, line, nr_secs);
|
||||
|
||||
for (i = 0; i < nr_secs; i++, paddr++) {
|
||||
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
|
||||
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
|
||||
|
||||
/* ppa to be sent to the device */
|
||||
ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
|
||||
|
||||
/* Write context for target bio completion on write buffer. Note
|
||||
* that the write buffer is protected by the sync backpointer,
|
||||
* and a single writer thread have access to each specific entry
|
||||
* at a time. Thus, it is safe to modify the context for the
|
||||
* entry we are setting up for submission without taking any
|
||||
* lock or memory barrier.
|
||||
*/
|
||||
if (i < valid_secs) {
|
||||
kref_get(&line->ref);
|
||||
atomic_inc(&line->sec_to_update);
|
||||
w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
|
||||
w_ctx->ppa = ppa_list[i];
|
||||
meta->lba = cpu_to_le64(w_ctx->lba);
|
||||
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
|
||||
if (lba_list[paddr] != addr_empty)
|
||||
line->nr_valid_lbas++;
|
||||
else
|
||||
atomic64_inc(&pblk->pad_wa);
|
||||
} else {
|
||||
lba_list[paddr] = addr_empty;
|
||||
meta->lba = addr_empty;
|
||||
__pblk_map_invalidate(pblk, line, paddr);
|
||||
}
|
||||
}
|
||||
|
||||
pblk_down_rq(pblk, ppa_list[0], lun_bitmap);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
|
||||
unsigned long *lun_bitmap, unsigned int valid_secs,
|
||||
unsigned int off)
|
||||
{
|
||||
void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
|
||||
void *meta_buffer;
|
||||
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
unsigned int map_secs;
|
||||
int min = pblk->min_write_pgs;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
for (i = off; i < rqd->nr_ppas; i += min) {
|
||||
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
|
||||
meta_buffer = pblk_get_meta(pblk, meta_list, i);
|
||||
|
||||
ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
|
||||
lun_bitmap, meta_buffer, map_secs);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* only if erase_ppa is set, acquire erase semaphore */
|
||||
int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
unsigned int sentry, unsigned long *lun_bitmap,
|
||||
unsigned int valid_secs, struct ppa_addr *erase_ppa)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
|
||||
void *meta_buffer;
|
||||
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
struct pblk_line *e_line, *d_line;
|
||||
unsigned int map_secs;
|
||||
int min = pblk->min_write_pgs;
|
||||
int i, erase_lun;
|
||||
int ret;
|
||||
|
||||
|
||||
for (i = 0; i < rqd->nr_ppas; i += min) {
|
||||
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
|
||||
meta_buffer = pblk_get_meta(pblk, meta_list, i);
|
||||
|
||||
ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
|
||||
lun_bitmap, meta_buffer, map_secs);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]);
|
||||
|
||||
/* line can change after page map. We might also be writing the
|
||||
* last line.
|
||||
*/
|
||||
e_line = pblk_line_get_erase(pblk);
|
||||
if (!e_line)
|
||||
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
|
||||
valid_secs, i + min);
|
||||
|
||||
spin_lock(&e_line->lock);
|
||||
if (!test_bit(erase_lun, e_line->erase_bitmap)) {
|
||||
set_bit(erase_lun, e_line->erase_bitmap);
|
||||
atomic_dec(&e_line->left_eblks);
|
||||
|
||||
*erase_ppa = ppa_list[i];
|
||||
erase_ppa->a.blk = e_line->id;
|
||||
erase_ppa->a.reserved = 0;
|
||||
|
||||
spin_unlock(&e_line->lock);
|
||||
|
||||
/* Avoid evaluating e_line->left_eblks */
|
||||
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
|
||||
valid_secs, i + min);
|
||||
}
|
||||
spin_unlock(&e_line->lock);
|
||||
}
|
||||
|
||||
d_line = pblk_line_get_data(pblk);
|
||||
|
||||
/* line can change after page map. We might also be writing the
|
||||
* last line.
|
||||
*/
|
||||
e_line = pblk_line_get_erase(pblk);
|
||||
if (!e_line)
|
||||
return -ENOSPC;
|
||||
|
||||
/* Erase blocks that are bad in this line but might not be in next */
|
||||
if (unlikely(pblk_ppa_empty(*erase_ppa)) &&
|
||||
bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
|
||||
int bit = -1;
|
||||
|
||||
retry:
|
||||
bit = find_next_bit(d_line->blk_bitmap,
|
||||
lm->blk_per_line, bit + 1);
|
||||
if (bit >= lm->blk_per_line)
|
||||
return 0;
|
||||
|
||||
spin_lock(&e_line->lock);
|
||||
if (test_bit(bit, e_line->erase_bitmap)) {
|
||||
spin_unlock(&e_line->lock);
|
||||
goto retry;
|
||||
}
|
||||
spin_unlock(&e_line->lock);
|
||||
|
||||
set_bit(bit, e_line->erase_bitmap);
|
||||
atomic_dec(&e_line->left_eblks);
|
||||
*erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
|
||||
erase_ppa->a.blk = e_line->id;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,858 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
|
||||
*
|
||||
* Based upon the circular ringbuffer.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* pblk-rb.c - pblk's write buffer
|
||||
*/
|
||||
|
||||
#include <linux/circ_buf.h>
|
||||
|
||||
#include "pblk.h"
|
||||
|
||||
static DECLARE_RWSEM(pblk_rb_lock);
|
||||
|
||||
static void pblk_rb_data_free(struct pblk_rb *rb)
|
||||
{
|
||||
struct pblk_rb_pages *p, *t;
|
||||
|
||||
down_write(&pblk_rb_lock);
|
||||
list_for_each_entry_safe(p, t, &rb->pages, list) {
|
||||
free_pages((unsigned long)page_address(p->pages), p->order);
|
||||
list_del(&p->list);
|
||||
kfree(p);
|
||||
}
|
||||
up_write(&pblk_rb_lock);
|
||||
}
|
||||
|
||||
void pblk_rb_free(struct pblk_rb *rb)
|
||||
{
|
||||
pblk_rb_data_free(rb);
|
||||
vfree(rb->entries);
|
||||
}
|
||||
|
||||
/*
|
||||
* pblk_rb_calculate_size -- calculate the size of the write buffer
|
||||
*/
|
||||
static unsigned int pblk_rb_calculate_size(unsigned int nr_entries,
|
||||
unsigned int threshold)
|
||||
{
|
||||
unsigned int thr_sz = 1 << (get_count_order(threshold + NVM_MAX_VLBA));
|
||||
unsigned int max_sz = max(thr_sz, nr_entries);
|
||||
unsigned int max_io;
|
||||
|
||||
/* Alloc a write buffer that can (i) fit at least two split bios
|
||||
* (considering max I/O size NVM_MAX_VLBA, and (ii) guarantee that the
|
||||
* threshold will be respected
|
||||
*/
|
||||
max_io = (1 << max((int)(get_count_order(max_sz)),
|
||||
(int)(get_count_order(NVM_MAX_VLBA << 1))));
|
||||
if ((threshold + NVM_MAX_VLBA) >= max_io)
|
||||
max_io <<= 1;
|
||||
|
||||
return max_io;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize ring buffer. The data and metadata buffers must be previously
|
||||
* allocated and their size must be a power of two
|
||||
* (Documentation/core-api/circular-buffers.rst)
|
||||
*/
|
||||
int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold,
|
||||
unsigned int seg_size)
|
||||
{
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
struct pblk_rb_entry *entries;
|
||||
unsigned int init_entry = 0;
|
||||
unsigned int max_order = MAX_ORDER - 1;
|
||||
unsigned int power_size, power_seg_sz;
|
||||
unsigned int alloc_order, order, iter;
|
||||
unsigned int nr_entries;
|
||||
|
||||
nr_entries = pblk_rb_calculate_size(size, threshold);
|
||||
entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry)));
|
||||
if (!entries)
|
||||
return -ENOMEM;
|
||||
|
||||
power_size = get_count_order(nr_entries);
|
||||
power_seg_sz = get_count_order(seg_size);
|
||||
|
||||
down_write(&pblk_rb_lock);
|
||||
rb->entries = entries;
|
||||
rb->seg_size = (1 << power_seg_sz);
|
||||
rb->nr_entries = (1 << power_size);
|
||||
rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
|
||||
rb->back_thres = threshold;
|
||||
rb->flush_point = EMPTY_ENTRY;
|
||||
|
||||
spin_lock_init(&rb->w_lock);
|
||||
spin_lock_init(&rb->s_lock);
|
||||
|
||||
INIT_LIST_HEAD(&rb->pages);
|
||||
|
||||
alloc_order = power_size;
|
||||
if (alloc_order >= max_order) {
|
||||
order = max_order;
|
||||
iter = (1 << (alloc_order - max_order));
|
||||
} else {
|
||||
order = alloc_order;
|
||||
iter = 1;
|
||||
}
|
||||
|
||||
do {
|
||||
struct pblk_rb_entry *entry;
|
||||
struct pblk_rb_pages *page_set;
|
||||
void *kaddr;
|
||||
unsigned long set_size;
|
||||
int i;
|
||||
|
||||
page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
|
||||
if (!page_set) {
|
||||
up_write(&pblk_rb_lock);
|
||||
vfree(entries);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
page_set->order = order;
|
||||
page_set->pages = alloc_pages(GFP_KERNEL, order);
|
||||
if (!page_set->pages) {
|
||||
kfree(page_set);
|
||||
pblk_rb_data_free(rb);
|
||||
up_write(&pblk_rb_lock);
|
||||
vfree(entries);
|
||||
return -ENOMEM;
|
||||
}
|
||||
kaddr = page_address(page_set->pages);
|
||||
|
||||
entry = &rb->entries[init_entry];
|
||||
entry->data = kaddr;
|
||||
entry->cacheline = pblk_cacheline_to_addr(init_entry++);
|
||||
entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
|
||||
|
||||
set_size = (1 << order);
|
||||
for (i = 1; i < set_size; i++) {
|
||||
entry = &rb->entries[init_entry];
|
||||
entry->cacheline = pblk_cacheline_to_addr(init_entry++);
|
||||
entry->data = kaddr + (i * rb->seg_size);
|
||||
entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
|
||||
bio_list_init(&entry->w_ctx.bios);
|
||||
}
|
||||
|
||||
list_add_tail(&page_set->list, &rb->pages);
|
||||
iter--;
|
||||
} while (iter > 0);
|
||||
up_write(&pblk_rb_lock);
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_set(&rb->inflight_flush_point, 0);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Initialize rate-limiter, which controls access to the write buffer
|
||||
* by user and GC I/O
|
||||
*/
|
||||
pblk_rl_init(&pblk->rl, rb->nr_entries, threshold);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void clean_wctx(struct pblk_w_ctx *w_ctx)
|
||||
{
|
||||
int flags;
|
||||
|
||||
flags = READ_ONCE(w_ctx->flags);
|
||||
WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
|
||||
"pblk: overwriting unsubmitted data\n");
|
||||
|
||||
/* Release flags on context. Protect from writes and reads */
|
||||
smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
|
||||
pblk_ppa_set_empty(&w_ctx->ppa);
|
||||
w_ctx->lba = ADDR_EMPTY;
|
||||
}
|
||||
|
||||
#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
|
||||
#define pblk_rb_ring_space(rb, head, tail, size) \
|
||||
(CIRC_SPACE(head, tail, size))
|
||||
|
||||
/*
|
||||
* Buffer space is calculated with respect to the back pointer signaling
|
||||
* synchronized entries to the media.
|
||||
*/
|
||||
static unsigned int pblk_rb_space(struct pblk_rb *rb)
|
||||
{
|
||||
unsigned int mem = READ_ONCE(rb->mem);
|
||||
unsigned int sync = READ_ONCE(rb->sync);
|
||||
|
||||
return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
|
||||
}
|
||||
|
||||
unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
|
||||
unsigned int nr_entries)
|
||||
{
|
||||
return (p + nr_entries) & (rb->nr_entries - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Buffer count is calculated with respect to the submission entry signaling the
|
||||
* entries that are available to send to the media
|
||||
*/
|
||||
unsigned int pblk_rb_read_count(struct pblk_rb *rb)
|
||||
{
|
||||
unsigned int mem = READ_ONCE(rb->mem);
|
||||
unsigned int subm = READ_ONCE(rb->subm);
|
||||
|
||||
return pblk_rb_ring_count(mem, subm, rb->nr_entries);
|
||||
}
|
||||
|
||||
unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
|
||||
{
|
||||
unsigned int mem = READ_ONCE(rb->mem);
|
||||
unsigned int sync = READ_ONCE(rb->sync);
|
||||
|
||||
return pblk_rb_ring_count(mem, sync, rb->nr_entries);
|
||||
}
|
||||
|
||||
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
|
||||
{
|
||||
unsigned int subm;
|
||||
|
||||
subm = READ_ONCE(rb->subm);
|
||||
/* Commit read means updating submission pointer */
|
||||
smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries));
|
||||
|
||||
return subm;
|
||||
}
|
||||
|
||||
static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
|
||||
{
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
struct pblk_line *line;
|
||||
struct pblk_rb_entry *entry;
|
||||
struct pblk_w_ctx *w_ctx;
|
||||
unsigned int user_io = 0, gc_io = 0;
|
||||
unsigned int i;
|
||||
int flags;
|
||||
|
||||
for (i = 0; i < to_update; i++) {
|
||||
entry = &rb->entries[rb->l2p_update];
|
||||
w_ctx = &entry->w_ctx;
|
||||
|
||||
flags = READ_ONCE(entry->w_ctx.flags);
|
||||
if (flags & PBLK_IOTYPE_USER)
|
||||
user_io++;
|
||||
else if (flags & PBLK_IOTYPE_GC)
|
||||
gc_io++;
|
||||
else
|
||||
WARN(1, "pblk: unknown IO type\n");
|
||||
|
||||
pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
|
||||
entry->cacheline);
|
||||
|
||||
line = pblk_ppa_to_line(pblk, w_ctx->ppa);
|
||||
atomic_dec(&line->sec_to_update);
|
||||
kref_put(&line->ref, pblk_line_put);
|
||||
clean_wctx(w_ctx);
|
||||
rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1);
|
||||
}
|
||||
|
||||
pblk_rl_out(&pblk->rl, user_io, gc_io);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* When we move the l2p_update pointer, we update the l2p table - lookups will
|
||||
* point to the physical address instead of to the cacheline in the write buffer
|
||||
* from this moment on.
|
||||
*/
|
||||
static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
|
||||
unsigned int mem, unsigned int sync)
|
||||
{
|
||||
unsigned int space, count;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&rb->w_lock);
|
||||
|
||||
/* Update l2p only as buffer entries are being overwritten */
|
||||
space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
|
||||
if (space > nr_entries)
|
||||
goto out;
|
||||
|
||||
count = nr_entries - space;
|
||||
/* l2p_update used exclusively under rb->w_lock */
|
||||
ret = __pblk_rb_update_l2p(rb, count);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the l2p entry for all sectors stored on the write buffer. This means
|
||||
* that all future lookups to the l2p table will point to a device address, not
|
||||
* to the cacheline in the write buffer.
|
||||
*/
|
||||
void pblk_rb_sync_l2p(struct pblk_rb *rb)
|
||||
{
|
||||
unsigned int sync;
|
||||
unsigned int to_update;
|
||||
|
||||
spin_lock(&rb->w_lock);
|
||||
|
||||
/* Protect from reads and writes */
|
||||
sync = smp_load_acquire(&rb->sync);
|
||||
|
||||
to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
|
||||
__pblk_rb_update_l2p(rb, to_update);
|
||||
|
||||
spin_unlock(&rb->w_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write @nr_entries to ring buffer from @data buffer if there is enough space.
|
||||
* Typically, 4KB data chunks coming from a bio will be copied to the ring
|
||||
* buffer, thus the write will fail if not all incoming data can be copied.
|
||||
*
|
||||
*/
|
||||
static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
|
||||
struct pblk_w_ctx w_ctx,
|
||||
struct pblk_rb_entry *entry)
|
||||
{
|
||||
memcpy(entry->data, data, rb->seg_size);
|
||||
|
||||
entry->w_ctx.lba = w_ctx.lba;
|
||||
entry->w_ctx.ppa = w_ctx.ppa;
|
||||
}
|
||||
|
||||
void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
|
||||
struct pblk_w_ctx w_ctx, unsigned int ring_pos)
|
||||
{
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
struct pblk_rb_entry *entry;
|
||||
int flags;
|
||||
|
||||
entry = &rb->entries[ring_pos];
|
||||
flags = READ_ONCE(entry->w_ctx.flags);
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
/* Caller must guarantee that the entry is free */
|
||||
BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
|
||||
#endif
|
||||
|
||||
__pblk_rb_write_entry(rb, data, w_ctx, entry);
|
||||
|
||||
pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
|
||||
flags = w_ctx.flags | PBLK_WRITTEN_DATA;
|
||||
|
||||
/* Release flags on write context. Protect from writes */
|
||||
smp_store_release(&entry->w_ctx.flags, flags);
|
||||
}
|
||||
|
||||
void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
|
||||
struct pblk_w_ctx w_ctx, struct pblk_line *line,
|
||||
u64 paddr, unsigned int ring_pos)
|
||||
{
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
struct pblk_rb_entry *entry;
|
||||
int flags;
|
||||
|
||||
entry = &rb->entries[ring_pos];
|
||||
flags = READ_ONCE(entry->w_ctx.flags);
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
/* Caller must guarantee that the entry is free */
|
||||
BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
|
||||
#endif
|
||||
|
||||
__pblk_rb_write_entry(rb, data, w_ctx, entry);
|
||||
|
||||
if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr))
|
||||
entry->w_ctx.lba = ADDR_EMPTY;
|
||||
|
||||
flags = w_ctx.flags | PBLK_WRITTEN_DATA;
|
||||
|
||||
/* Release flags on write context. Protect from writes */
|
||||
smp_store_release(&entry->w_ctx.flags, flags);
|
||||
}
|
||||
|
||||
static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
|
||||
unsigned int pos)
|
||||
{
|
||||
struct pblk_rb_entry *entry;
|
||||
unsigned int sync, flush_point;
|
||||
|
||||
pblk_rb_sync_init(rb, NULL);
|
||||
sync = READ_ONCE(rb->sync);
|
||||
|
||||
if (pos == sync) {
|
||||
pblk_rb_sync_end(rb, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_inc(&rb->inflight_flush_point);
|
||||
#endif
|
||||
|
||||
flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
|
||||
entry = &rb->entries[flush_point];
|
||||
|
||||
/* Protect flush points */
|
||||
smp_store_release(&rb->flush_point, flush_point);
|
||||
|
||||
if (bio)
|
||||
bio_list_add(&entry->w_ctx.bios, bio);
|
||||
|
||||
pblk_rb_sync_end(rb, NULL);
|
||||
|
||||
return bio ? 1 : 0;
|
||||
}
|
||||
|
||||
static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
|
||||
unsigned int *pos)
|
||||
{
|
||||
unsigned int mem;
|
||||
unsigned int sync;
|
||||
unsigned int threshold;
|
||||
|
||||
sync = READ_ONCE(rb->sync);
|
||||
mem = READ_ONCE(rb->mem);
|
||||
|
||||
threshold = nr_entries + rb->back_thres;
|
||||
|
||||
if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold)
|
||||
return 0;
|
||||
|
||||
if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
|
||||
return 0;
|
||||
|
||||
*pos = mem;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
|
||||
unsigned int *pos)
|
||||
{
|
||||
if (!__pblk_rb_may_write(rb, nr_entries, pos))
|
||||
return 0;
|
||||
|
||||
/* Protect from read count */
|
||||
smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries));
|
||||
return 1;
|
||||
}
|
||||
|
||||
void pblk_rb_flush(struct pblk_rb *rb)
|
||||
{
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
unsigned int mem = READ_ONCE(rb->mem);
|
||||
|
||||
if (pblk_rb_flush_point_set(rb, NULL, mem))
|
||||
return;
|
||||
|
||||
pblk_write_kick(pblk);
|
||||
}
|
||||
|
||||
static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
|
||||
unsigned int *pos, struct bio *bio,
|
||||
int *io_ret)
|
||||
{
|
||||
unsigned int mem;
|
||||
|
||||
if (!__pblk_rb_may_write(rb, nr_entries, pos))
|
||||
return 0;
|
||||
|
||||
mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries);
|
||||
*io_ret = NVM_IO_DONE;
|
||||
|
||||
if (bio->bi_opf & REQ_PREFLUSH) {
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
|
||||
atomic64_inc(&pblk->nr_flush);
|
||||
if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
|
||||
*io_ret = NVM_IO_OK;
|
||||
}
|
||||
|
||||
/* Protect from read count */
|
||||
smp_store_release(&rb->mem, mem);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Atomically check that (i) there is space on the write buffer for the
|
||||
* incoming I/O, and (ii) the current I/O type has enough budget in the write
|
||||
* buffer (rate-limiter).
|
||||
*/
|
||||
int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
|
||||
unsigned int nr_entries, unsigned int *pos)
|
||||
{
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
int io_ret;
|
||||
|
||||
spin_lock(&rb->w_lock);
|
||||
io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
|
||||
if (io_ret) {
|
||||
spin_unlock(&rb->w_lock);
|
||||
return io_ret;
|
||||
}
|
||||
|
||||
if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
|
||||
spin_unlock(&rb->w_lock);
|
||||
return NVM_IO_REQUEUE;
|
||||
}
|
||||
|
||||
pblk_rl_user_in(&pblk->rl, nr_entries);
|
||||
spin_unlock(&rb->w_lock);
|
||||
|
||||
return io_ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look at pblk_rb_may_write_user comment
|
||||
*/
|
||||
int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
|
||||
unsigned int *pos)
|
||||
{
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
|
||||
spin_lock(&rb->w_lock);
|
||||
if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
|
||||
spin_unlock(&rb->w_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!pblk_rb_may_write(rb, nr_entries, pos)) {
|
||||
spin_unlock(&rb->w_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
pblk_rl_gc_in(&pblk->rl, nr_entries);
|
||||
spin_unlock(&rb->w_lock);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read available entries on rb and add them to the given bio. To avoid a memory
|
||||
* copy, a page reference to the write buffer is used to be added to the bio.
|
||||
*
|
||||
* This function is used by the write thread to form the write bio that will
|
||||
* persist data on the write buffer to the media.
|
||||
*/
|
||||
unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
|
||||
unsigned int pos, unsigned int nr_entries,
|
||||
unsigned int count)
|
||||
{
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
struct request_queue *q = pblk->dev->q;
|
||||
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct bio *bio = rqd->bio;
|
||||
struct pblk_rb_entry *entry;
|
||||
struct page *page;
|
||||
unsigned int pad = 0, to_read = nr_entries;
|
||||
unsigned int i;
|
||||
int flags;
|
||||
|
||||
if (count < nr_entries) {
|
||||
pad = nr_entries - count;
|
||||
to_read = count;
|
||||
}
|
||||
|
||||
/* Add space for packed metadata if in use*/
|
||||
pad += (pblk->min_write_pgs - pblk->min_write_pgs_data);
|
||||
|
||||
c_ctx->sentry = pos;
|
||||
c_ctx->nr_valid = to_read;
|
||||
c_ctx->nr_padded = pad;
|
||||
|
||||
for (i = 0; i < to_read; i++) {
|
||||
entry = &rb->entries[pos];
|
||||
|
||||
/* A write has been allowed into the buffer, but data is still
|
||||
* being copied to it. It is ok to busy wait.
|
||||
*/
|
||||
try:
|
||||
flags = READ_ONCE(entry->w_ctx.flags);
|
||||
if (!(flags & PBLK_WRITTEN_DATA)) {
|
||||
io_schedule();
|
||||
goto try;
|
||||
}
|
||||
|
||||
page = virt_to_page(entry->data);
|
||||
if (!page) {
|
||||
pblk_err(pblk, "could not allocate write bio page\n");
|
||||
flags &= ~PBLK_WRITTEN_DATA;
|
||||
flags |= PBLK_SUBMITTED_ENTRY;
|
||||
/* Release flags on context. Protect from writes */
|
||||
smp_store_release(&entry->w_ctx.flags, flags);
|
||||
return NVM_IO_ERR;
|
||||
}
|
||||
|
||||
if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
|
||||
rb->seg_size) {
|
||||
pblk_err(pblk, "could not add page to write bio\n");
|
||||
flags &= ~PBLK_WRITTEN_DATA;
|
||||
flags |= PBLK_SUBMITTED_ENTRY;
|
||||
/* Release flags on context. Protect from writes */
|
||||
smp_store_release(&entry->w_ctx.flags, flags);
|
||||
return NVM_IO_ERR;
|
||||
}
|
||||
|
||||
flags &= ~PBLK_WRITTEN_DATA;
|
||||
flags |= PBLK_SUBMITTED_ENTRY;
|
||||
|
||||
/* Release flags on context. Protect from writes */
|
||||
smp_store_release(&entry->w_ctx.flags, flags);
|
||||
|
||||
pos = pblk_rb_ptr_wrap(rb, pos, 1);
|
||||
}
|
||||
|
||||
if (pad) {
|
||||
if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
|
||||
pblk_err(pblk, "could not pad page in write bio\n");
|
||||
return NVM_IO_ERR;
|
||||
}
|
||||
|
||||
if (pad < pblk->min_write_pgs)
|
||||
atomic64_inc(&pblk->pad_dist[pad - 1]);
|
||||
else
|
||||
pblk_warn(pblk, "padding more than min. sectors\n");
|
||||
|
||||
atomic64_add(pad, &pblk->pad_wa);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_add(pad, &pblk->padded_writes);
|
||||
#endif
|
||||
|
||||
return NVM_IO_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy to bio only if the lba matches the one on the given cache entry.
|
||||
* Otherwise, it means that the entry has been overwritten, and the bio should
|
||||
* be directed to disk.
|
||||
*/
|
||||
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
|
||||
struct ppa_addr ppa)
|
||||
{
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
struct pblk_rb_entry *entry;
|
||||
struct pblk_w_ctx *w_ctx;
|
||||
struct ppa_addr l2p_ppa;
|
||||
u64 pos = pblk_addr_to_cacheline(ppa);
|
||||
void *data;
|
||||
int flags;
|
||||
int ret = 1;
|
||||
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
/* Caller must ensure that the access will not cause an overflow */
|
||||
BUG_ON(pos >= rb->nr_entries);
|
||||
#endif
|
||||
entry = &rb->entries[pos];
|
||||
w_ctx = &entry->w_ctx;
|
||||
flags = READ_ONCE(w_ctx->flags);
|
||||
|
||||
spin_lock(&rb->w_lock);
|
||||
spin_lock(&pblk->trans_lock);
|
||||
l2p_ppa = pblk_trans_map_get(pblk, lba);
|
||||
spin_unlock(&pblk->trans_lock);
|
||||
|
||||
/* Check if the entry has been overwritten or is scheduled to be */
|
||||
if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
|
||||
flags & PBLK_WRITABLE_ENTRY) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
data = bio_data(bio);
|
||||
memcpy(data, entry->data, rb->seg_size);
|
||||
|
||||
out:
|
||||
spin_unlock(&rb->w_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
|
||||
{
|
||||
unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0);
|
||||
|
||||
return &rb->entries[entry].w_ctx;
|
||||
}
|
||||
|
||||
unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
|
||||
__acquires(&rb->s_lock)
|
||||
{
|
||||
if (flags)
|
||||
spin_lock_irqsave(&rb->s_lock, *flags);
|
||||
else
|
||||
spin_lock_irq(&rb->s_lock);
|
||||
|
||||
return rb->sync;
|
||||
}
|
||||
|
||||
void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
|
||||
__releases(&rb->s_lock)
|
||||
{
|
||||
lockdep_assert_held(&rb->s_lock);
|
||||
|
||||
if (flags)
|
||||
spin_unlock_irqrestore(&rb->s_lock, *flags);
|
||||
else
|
||||
spin_unlock_irq(&rb->s_lock);
|
||||
}
|
||||
|
||||
unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
|
||||
{
|
||||
unsigned int sync, flush_point;
|
||||
lockdep_assert_held(&rb->s_lock);
|
||||
|
||||
sync = READ_ONCE(rb->sync);
|
||||
flush_point = READ_ONCE(rb->flush_point);
|
||||
|
||||
if (flush_point != EMPTY_ENTRY) {
|
||||
unsigned int secs_to_flush;
|
||||
|
||||
secs_to_flush = pblk_rb_ring_count(flush_point, sync,
|
||||
rb->nr_entries);
|
||||
if (secs_to_flush < nr_entries) {
|
||||
/* Protect flush points */
|
||||
smp_store_release(&rb->flush_point, EMPTY_ENTRY);
|
||||
}
|
||||
}
|
||||
|
||||
sync = pblk_rb_ptr_wrap(rb, sync, nr_entries);
|
||||
|
||||
/* Protect from counts */
|
||||
smp_store_release(&rb->sync, sync);
|
||||
|
||||
return sync;
|
||||
}
|
||||
|
||||
/* Calculate how many sectors to submit up to the current flush point. */
|
||||
unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb)
|
||||
{
|
||||
unsigned int subm, sync, flush_point;
|
||||
unsigned int submitted, to_flush;
|
||||
|
||||
/* Protect flush points */
|
||||
flush_point = smp_load_acquire(&rb->flush_point);
|
||||
if (flush_point == EMPTY_ENTRY)
|
||||
return 0;
|
||||
|
||||
/* Protect syncs */
|
||||
sync = smp_load_acquire(&rb->sync);
|
||||
|
||||
subm = READ_ONCE(rb->subm);
|
||||
submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries);
|
||||
|
||||
/* The sync point itself counts as a sector to sync */
|
||||
to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1;
|
||||
|
||||
return (submitted < to_flush) ? (to_flush - submitted) : 0;
|
||||
}
|
||||
|
||||
int pblk_rb_tear_down_check(struct pblk_rb *rb)
|
||||
{
|
||||
struct pblk_rb_entry *entry;
|
||||
int i;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&rb->w_lock);
|
||||
spin_lock_irq(&rb->s_lock);
|
||||
|
||||
if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
|
||||
(rb->sync == rb->l2p_update) &&
|
||||
(rb->flush_point == EMPTY_ENTRY)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!rb->entries) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < rb->nr_entries; i++) {
|
||||
entry = &rb->entries[i];
|
||||
|
||||
if (!entry->data) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
spin_unlock_irq(&rb->s_lock);
|
||||
spin_unlock(&rb->w_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
|
||||
{
|
||||
return (pos & (rb->nr_entries - 1));
|
||||
}
|
||||
|
||||
int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
|
||||
{
|
||||
return (pos >= rb->nr_entries);
|
||||
}
|
||||
|
||||
ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
|
||||
{
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
struct pblk_c_ctx *c;
|
||||
ssize_t offset;
|
||||
int queued_entries = 0;
|
||||
|
||||
spin_lock_irq(&rb->s_lock);
|
||||
list_for_each_entry(c, &pblk->compl_list, list)
|
||||
queued_entries++;
|
||||
spin_unlock_irq(&rb->s_lock);
|
||||
|
||||
if (rb->flush_point != EMPTY_ENTRY)
|
||||
offset = scnprintf(buf, PAGE_SIZE,
|
||||
"%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
|
||||
rb->nr_entries,
|
||||
rb->mem,
|
||||
rb->subm,
|
||||
rb->sync,
|
||||
rb->l2p_update,
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_read(&rb->inflight_flush_point),
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
rb->flush_point,
|
||||
pblk_rb_read_count(rb),
|
||||
pblk_rb_space(rb),
|
||||
pblk_rb_flush_point_count(rb),
|
||||
queued_entries);
|
||||
else
|
||||
offset = scnprintf(buf, PAGE_SIZE,
|
||||
"%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
|
||||
rb->nr_entries,
|
||||
rb->mem,
|
||||
rb->subm,
|
||||
rb->sync,
|
||||
rb->l2p_update,
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_read(&rb->inflight_flush_point),
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
pblk_rb_read_count(rb),
|
||||
pblk_rb_space(rb),
|
||||
pblk_rb_flush_point_count(rb),
|
||||
queued_entries);
|
||||
|
||||
return offset;
|
||||
}
|
@ -1,474 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
|
||||
* Matias Bjorling <matias@cnexlabs.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* pblk-read.c - pblk's read path
|
||||
*/
|
||||
|
||||
#include "pblk.h"
|
||||
|
||||
/*
|
||||
* There is no guarantee that the value read from cache has not been updated and
|
||||
* resides at another location in the cache. We guarantee though that if the
|
||||
* value is read from the cache, it belongs to the mapped lba. In order to
|
||||
* guarantee and order between writes and reads are ordered, a flush must be
|
||||
* issued.
|
||||
*/
|
||||
static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
|
||||
sector_t lba, struct ppa_addr ppa)
|
||||
{
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
/* Callers must ensure that the ppa points to a cache address */
|
||||
BUG_ON(pblk_ppa_empty(ppa));
|
||||
BUG_ON(!pblk_addr_in_cache(ppa));
|
||||
#endif
|
||||
|
||||
return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa);
|
||||
}
|
||||
|
||||
static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
struct bio *bio, sector_t blba,
|
||||
bool *from_cache)
|
||||
{
|
||||
void *meta_list = rqd->meta_list;
|
||||
int nr_secs, i;
|
||||
|
||||
retry:
|
||||
nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas,
|
||||
from_cache);
|
||||
|
||||
if (!*from_cache)
|
||||
goto end;
|
||||
|
||||
for (i = 0; i < nr_secs; i++) {
|
||||
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
|
||||
sector_t lba = blba + i;
|
||||
|
||||
if (pblk_ppa_empty(rqd->ppa_list[i])) {
|
||||
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
|
||||
|
||||
meta->lba = addr_empty;
|
||||
} else if (pblk_addr_in_cache(rqd->ppa_list[i])) {
|
||||
/*
|
||||
* Try to read from write buffer. The address is later
|
||||
* checked on the write buffer to prevent retrieving
|
||||
* overwritten data.
|
||||
*/
|
||||
if (!pblk_read_from_cache(pblk, bio, lba,
|
||||
rqd->ppa_list[i])) {
|
||||
if (i == 0) {
|
||||
/*
|
||||
* We didn't call with bio_advance()
|
||||
* yet, so we can just retry.
|
||||
*/
|
||||
goto retry;
|
||||
} else {
|
||||
/*
|
||||
* We already call bio_advance()
|
||||
* so we cannot retry and we need
|
||||
* to quit that function in order
|
||||
* to allow caller to handle the bio
|
||||
* splitting in the current sector
|
||||
* position.
|
||||
*/
|
||||
nr_secs = i;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
meta->lba = cpu_to_le64(lba);
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_inc(&pblk->cache_reads);
|
||||
#endif
|
||||
}
|
||||
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
|
||||
}
|
||||
|
||||
end:
|
||||
if (pblk_io_aligned(pblk, nr_secs))
|
||||
rqd->is_seq = 1;
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_add(nr_secs, &pblk->inflight_reads);
|
||||
#endif
|
||||
|
||||
return nr_secs;
|
||||
}
|
||||
|
||||
|
||||
static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
sector_t blba)
|
||||
{
|
||||
void *meta_list = rqd->meta_list;
|
||||
int nr_lbas = rqd->nr_ppas;
|
||||
int i;
|
||||
|
||||
if (!pblk_is_oob_meta_supported(pblk))
|
||||
return;
|
||||
|
||||
for (i = 0; i < nr_lbas; i++) {
|
||||
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
|
||||
u64 lba = le64_to_cpu(meta->lba);
|
||||
|
||||
if (lba == ADDR_EMPTY)
|
||||
continue;
|
||||
|
||||
if (lba != blba + i) {
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
|
||||
print_ppa(pblk, &ppa_list[i], "seq", i);
|
||||
#endif
|
||||
pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
|
||||
lba, (u64)blba + i);
|
||||
WARN_ON(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* There can be holes in the lba list.
|
||||
*/
|
||||
static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
u64 *lba_list, int nr_lbas)
|
||||
{
|
||||
void *meta_lba_list = rqd->meta_list;
|
||||
int i, j;
|
||||
|
||||
if (!pblk_is_oob_meta_supported(pblk))
|
||||
return;
|
||||
|
||||
for (i = 0, j = 0; i < nr_lbas; i++) {
|
||||
struct pblk_sec_meta *meta = pblk_get_meta(pblk,
|
||||
meta_lba_list, j);
|
||||
u64 lba = lba_list[i];
|
||||
u64 meta_lba;
|
||||
|
||||
if (lba == ADDR_EMPTY)
|
||||
continue;
|
||||
|
||||
meta_lba = le64_to_cpu(meta->lba);
|
||||
|
||||
if (lba != meta_lba) {
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
|
||||
print_ppa(pblk, &ppa_list[j], "rnd", j);
|
||||
#endif
|
||||
pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
|
||||
meta_lba, lba);
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
j++;
|
||||
}
|
||||
|
||||
WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
|
||||
}
|
||||
|
||||
static void pblk_end_user_read(struct bio *bio, int error)
|
||||
{
|
||||
if (error && error != NVM_RSP_WARN_HIGHECC)
|
||||
bio_io_error(bio);
|
||||
else
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
bool put_line)
|
||||
{
|
||||
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct bio *int_bio = rqd->bio;
|
||||
unsigned long start_time = r_ctx->start_time;
|
||||
|
||||
bio_end_io_acct(int_bio, start_time);
|
||||
|
||||
if (rqd->error)
|
||||
pblk_log_read_err(pblk, rqd);
|
||||
|
||||
pblk_read_check_seq(pblk, rqd, r_ctx->lba);
|
||||
bio_put(int_bio);
|
||||
|
||||
if (put_line)
|
||||
pblk_rq_to_line_put(pblk, rqd);
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_add(rqd->nr_ppas, &pblk->sync_reads);
|
||||
atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads);
|
||||
#endif
|
||||
|
||||
pblk_free_rqd(pblk, rqd, PBLK_READ);
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
}
|
||||
|
||||
static void pblk_end_io_read(struct nvm_rq *rqd)
|
||||
{
|
||||
struct pblk *pblk = rqd->private;
|
||||
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct bio *bio = (struct bio *)r_ctx->private;
|
||||
|
||||
pblk_end_user_read(bio, rqd->error);
|
||||
__pblk_end_io_read(pblk, rqd, true);
|
||||
}
|
||||
|
||||
static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
|
||||
sector_t lba, bool *from_cache)
|
||||
{
|
||||
struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0);
|
||||
struct ppa_addr ppa;
|
||||
|
||||
pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_inc(&pblk->inflight_reads);
|
||||
#endif
|
||||
|
||||
retry:
|
||||
if (pblk_ppa_empty(ppa)) {
|
||||
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
|
||||
|
||||
meta->lba = addr_empty;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Try to read from write buffer. The address is later checked on the
|
||||
* write buffer to prevent retrieving overwritten data.
|
||||
*/
|
||||
if (pblk_addr_in_cache(ppa)) {
|
||||
if (!pblk_read_from_cache(pblk, bio, lba, ppa)) {
|
||||
pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
meta->lba = cpu_to_le64(lba);
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_inc(&pblk->cache_reads);
|
||||
#endif
|
||||
} else {
|
||||
rqd->ppa_addr = ppa;
|
||||
}
|
||||
}
|
||||
|
||||
void pblk_submit_read(struct pblk *pblk, struct bio *bio)
|
||||
{
|
||||
sector_t blba = pblk_get_lba(bio);
|
||||
unsigned int nr_secs = pblk_get_secs(bio);
|
||||
bool from_cache;
|
||||
struct pblk_g_ctx *r_ctx;
|
||||
struct nvm_rq *rqd;
|
||||
struct bio *int_bio, *split_bio;
|
||||
unsigned long start_time;
|
||||
|
||||
start_time = bio_start_io_acct(bio);
|
||||
|
||||
rqd = pblk_alloc_rqd(pblk, PBLK_READ);
|
||||
|
||||
rqd->opcode = NVM_OP_PREAD;
|
||||
rqd->nr_ppas = nr_secs;
|
||||
rqd->private = pblk;
|
||||
rqd->end_io = pblk_end_io_read;
|
||||
|
||||
r_ctx = nvm_rq_to_pdu(rqd);
|
||||
r_ctx->start_time = start_time;
|
||||
r_ctx->lba = blba;
|
||||
|
||||
if (pblk_alloc_rqd_meta(pblk, rqd)) {
|
||||
bio_io_error(bio);
|
||||
pblk_free_rqd(pblk, rqd, PBLK_READ);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Clone read bio to deal internally with:
|
||||
* -read errors when reading from drive
|
||||
* -bio_advance() calls during cache reads
|
||||
*/
|
||||
int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
|
||||
|
||||
if (nr_secs > 1)
|
||||
nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba,
|
||||
&from_cache);
|
||||
else
|
||||
pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache);
|
||||
|
||||
split_retry:
|
||||
r_ctx->private = bio; /* original bio */
|
||||
rqd->bio = int_bio; /* internal bio */
|
||||
|
||||
if (from_cache && nr_secs == rqd->nr_ppas) {
|
||||
/* All data was read from cache, we can complete the IO. */
|
||||
pblk_end_user_read(bio, 0);
|
||||
atomic_inc(&pblk->inflight_io);
|
||||
__pblk_end_io_read(pblk, rqd, false);
|
||||
} else if (nr_secs != rqd->nr_ppas) {
|
||||
/* The read bio request could be partially filled by the write
|
||||
* buffer, but there are some holes that need to be read from
|
||||
* the drive. In order to handle this, we will use block layer
|
||||
* mechanism to split this request in to smaller ones and make
|
||||
* a chain of it.
|
||||
*/
|
||||
split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
|
||||
&pblk_bio_set);
|
||||
bio_chain(split_bio, bio);
|
||||
submit_bio_noacct(bio);
|
||||
|
||||
/* New bio contains first N sectors of the previous one, so
|
||||
* we can continue to use existing rqd, but we need to shrink
|
||||
* the number of PPAs in it. New bio is also guaranteed that
|
||||
* it contains only either data from cache or from drive, newer
|
||||
* mix of them.
|
||||
*/
|
||||
bio = split_bio;
|
||||
rqd->nr_ppas = nr_secs;
|
||||
if (rqd->nr_ppas == 1)
|
||||
rqd->ppa_addr = rqd->ppa_list[0];
|
||||
|
||||
/* Recreate int_bio - existing might have some needed internal
|
||||
* fields modified already.
|
||||
*/
|
||||
bio_put(int_bio);
|
||||
int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
|
||||
goto split_retry;
|
||||
} else if (pblk_submit_io(pblk, rqd, NULL)) {
|
||||
/* Submitting IO to drive failed, let's report an error */
|
||||
rqd->error = -ENODEV;
|
||||
pblk_end_io_read(rqd);
|
||||
}
|
||||
}
|
||||
|
||||
static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
struct pblk_line *line, u64 *lba_list,
|
||||
u64 *paddr_list_gc, unsigned int nr_secs)
|
||||
{
|
||||
struct ppa_addr ppa_list_l2p[NVM_MAX_VLBA];
|
||||
struct ppa_addr ppa_gc;
|
||||
int valid_secs = 0;
|
||||
int i;
|
||||
|
||||
pblk_lookup_l2p_rand(pblk, ppa_list_l2p, lba_list, nr_secs);
|
||||
|
||||
for (i = 0; i < nr_secs; i++) {
|
||||
if (lba_list[i] == ADDR_EMPTY)
|
||||
continue;
|
||||
|
||||
ppa_gc = addr_to_gen_ppa(pblk, paddr_list_gc[i], line->id);
|
||||
if (!pblk_ppa_comp(ppa_list_l2p[i], ppa_gc)) {
|
||||
paddr_list_gc[i] = lba_list[i] = ADDR_EMPTY;
|
||||
continue;
|
||||
}
|
||||
|
||||
rqd->ppa_list[valid_secs++] = ppa_list_l2p[i];
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_add(valid_secs, &pblk->inflight_reads);
|
||||
#endif
|
||||
|
||||
return valid_secs;
|
||||
}
|
||||
|
||||
static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
struct pblk_line *line, sector_t lba,
|
||||
u64 paddr_gc)
|
||||
{
|
||||
struct ppa_addr ppa_l2p, ppa_gc;
|
||||
int valid_secs = 0;
|
||||
|
||||
if (lba == ADDR_EMPTY)
|
||||
goto out;
|
||||
|
||||
/* logic error: lba out-of-bounds */
|
||||
if (lba >= pblk->capacity) {
|
||||
WARN(1, "pblk: read lba out of bounds\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&pblk->trans_lock);
|
||||
ppa_l2p = pblk_trans_map_get(pblk, lba);
|
||||
spin_unlock(&pblk->trans_lock);
|
||||
|
||||
ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, line->id);
|
||||
if (!pblk_ppa_comp(ppa_l2p, ppa_gc))
|
||||
goto out;
|
||||
|
||||
rqd->ppa_addr = ppa_l2p;
|
||||
valid_secs = 1;
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_inc(&pblk->inflight_reads);
|
||||
#endif
|
||||
|
||||
out:
|
||||
return valid_secs;
|
||||
}
|
||||
|
||||
int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
|
||||
{
|
||||
struct nvm_rq rqd;
|
||||
int ret = NVM_IO_OK;
|
||||
|
||||
memset(&rqd, 0, sizeof(struct nvm_rq));
|
||||
|
||||
ret = pblk_alloc_rqd_meta(pblk, &rqd);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (gc_rq->nr_secs > 1) {
|
||||
gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line,
|
||||
gc_rq->lba_list,
|
||||
gc_rq->paddr_list,
|
||||
gc_rq->nr_secs);
|
||||
if (gc_rq->secs_to_gc == 1)
|
||||
rqd.ppa_addr = rqd.ppa_list[0];
|
||||
} else {
|
||||
gc_rq->secs_to_gc = read_rq_gc(pblk, &rqd, gc_rq->line,
|
||||
gc_rq->lba_list[0],
|
||||
gc_rq->paddr_list[0]);
|
||||
}
|
||||
|
||||
if (!(gc_rq->secs_to_gc))
|
||||
goto out;
|
||||
|
||||
rqd.opcode = NVM_OP_PREAD;
|
||||
rqd.nr_ppas = gc_rq->secs_to_gc;
|
||||
|
||||
if (pblk_submit_io_sync(pblk, &rqd, gc_rq->data)) {
|
||||
ret = -EIO;
|
||||
goto err_free_dma;
|
||||
}
|
||||
|
||||
pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs);
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
|
||||
if (rqd.error) {
|
||||
atomic_long_inc(&pblk->read_failed_gc);
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
pblk_print_failed_rqd(pblk, &rqd, rqd.error);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_add(gc_rq->secs_to_gc, &pblk->sync_reads);
|
||||
atomic_long_add(gc_rq->secs_to_gc, &pblk->recov_gc_reads);
|
||||
atomic_long_sub(gc_rq->secs_to_gc, &pblk->inflight_reads);
|
||||
#endif
|
||||
|
||||
out:
|
||||
pblk_free_rqd_meta(pblk, &rqd);
|
||||
return ret;
|
||||
|
||||
err_free_dma:
|
||||
pblk_free_rqd_meta(pblk, &rqd);
|
||||
return ret;
|
||||
}
|
@ -1,874 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial: Javier Gonzalez <javier@cnexlabs.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* pblk-recovery.c - pblk's recovery path
|
||||
*
|
||||
* The L2P recovery path is single threaded as the L2P table is updated in order
|
||||
* following the line sequence ID.
|
||||
*/
|
||||
|
||||
#include "pblk.h"
|
||||
#include "pblk-trace.h"
|
||||
|
||||
int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
|
||||
{
|
||||
u32 crc;
|
||||
|
||||
crc = pblk_calc_emeta_crc(pblk, emeta_buf);
|
||||
if (le32_to_cpu(emeta_buf->crc) != crc)
|
||||
return 1;
|
||||
|
||||
if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_emeta *emeta = line->emeta;
|
||||
struct line_emeta *emeta_buf = emeta->buf;
|
||||
__le64 *lba_list;
|
||||
u64 data_start, data_end;
|
||||
u64 nr_valid_lbas, nr_lbas = 0;
|
||||
u64 i;
|
||||
|
||||
lba_list = emeta_to_lbas(pblk, emeta_buf);
|
||||
if (!lba_list)
|
||||
return 1;
|
||||
|
||||
data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
|
||||
data_end = line->emeta_ssec;
|
||||
nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
|
||||
|
||||
for (i = data_start; i < data_end; i++) {
|
||||
struct ppa_addr ppa;
|
||||
int pos;
|
||||
|
||||
ppa = addr_to_gen_ppa(pblk, i, line->id);
|
||||
pos = pblk_ppa_to_pos(geo, ppa);
|
||||
|
||||
/* Do not update bad blocks */
|
||||
if (test_bit(pos, line->blk_bitmap))
|
||||
continue;
|
||||
|
||||
if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
|
||||
spin_lock(&line->lock);
|
||||
if (test_and_set_bit(i, line->invalid_bitmap))
|
||||
WARN_ONCE(1, "pblk: rec. double invalidate:\n");
|
||||
else
|
||||
le32_add_cpu(line->vsc, -1);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
|
||||
nr_lbas++;
|
||||
}
|
||||
|
||||
if (nr_valid_lbas != nr_lbas)
|
||||
pblk_err(pblk, "line %d - inconsistent lba list(%llu/%llu)\n",
|
||||
line->id, nr_valid_lbas, nr_lbas);
|
||||
|
||||
line->left_msecs = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line,
|
||||
u64 written_secs)
|
||||
{
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < written_secs; i += pblk->min_write_pgs)
|
||||
__pblk_alloc_page(pblk, line, pblk->min_write_pgs);
|
||||
|
||||
spin_lock(&l_mg->free_lock);
|
||||
if (written_secs > line->left_msecs) {
|
||||
/*
|
||||
* We have all data sectors written
|
||||
* and some emeta sectors written too.
|
||||
*/
|
||||
line->left_msecs = 0;
|
||||
} else {
|
||||
/* We have only some data sectors written. */
|
||||
line->left_msecs -= written_secs;
|
||||
}
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
}
|
||||
|
||||
static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
|
||||
u64 written_secs = 0;
|
||||
int valid_chunks = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < lm->blk_per_line; i++) {
|
||||
struct nvm_chk_meta *chunk = &line->chks[i];
|
||||
|
||||
if (chunk->state & NVM_CHK_ST_OFFLINE)
|
||||
continue;
|
||||
|
||||
written_secs += chunk->wp;
|
||||
valid_chunks++;
|
||||
}
|
||||
|
||||
if (lm->blk_per_line - nr_bb != valid_chunks)
|
||||
pblk_err(pblk, "recovery line %d is bad\n", line->id);
|
||||
|
||||
pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec);
|
||||
|
||||
return written_secs;
|
||||
}
|
||||
|
||||
struct pblk_recov_alloc {
|
||||
struct ppa_addr *ppa_list;
|
||||
void *meta_list;
|
||||
struct nvm_rq *rqd;
|
||||
void *data;
|
||||
dma_addr_t dma_ppa_list;
|
||||
dma_addr_t dma_meta_list;
|
||||
};
|
||||
|
||||
static void pblk_recov_complete(struct kref *ref)
|
||||
{
|
||||
struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
|
||||
|
||||
complete(&pad_rq->wait);
|
||||
}
|
||||
|
||||
static void pblk_end_io_recov(struct nvm_rq *rqd)
|
||||
{
|
||||
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
struct pblk_pad_rq *pad_rq = rqd->private;
|
||||
struct pblk *pblk = pad_rq->pblk;
|
||||
|
||||
pblk_up_chunk(pblk, ppa_list[0]);
|
||||
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
kref_put(&pad_rq->ref, pblk_recov_complete);
|
||||
}
|
||||
|
||||
/* pad line using line bitmap. */
|
||||
static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
|
||||
int left_ppas)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
void *meta_list;
|
||||
struct pblk_pad_rq *pad_rq;
|
||||
struct nvm_rq *rqd;
|
||||
struct ppa_addr *ppa_list;
|
||||
void *data;
|
||||
__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
|
||||
u64 w_ptr = line->cur_sec;
|
||||
int left_line_ppas, rq_ppas;
|
||||
int i, j;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&line->lock);
|
||||
left_line_ppas = line->left_msecs;
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
|
||||
if (!pad_rq)
|
||||
return -ENOMEM;
|
||||
|
||||
data = vzalloc(array_size(pblk->max_write_pgs, geo->csecs));
|
||||
if (!data) {
|
||||
ret = -ENOMEM;
|
||||
goto free_rq;
|
||||
}
|
||||
|
||||
pad_rq->pblk = pblk;
|
||||
init_completion(&pad_rq->wait);
|
||||
kref_init(&pad_rq->ref);
|
||||
|
||||
next_pad_rq:
|
||||
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
|
||||
if (rq_ppas < pblk->min_write_pgs) {
|
||||
pblk_err(pblk, "corrupted pad line %d\n", line->id);
|
||||
goto fail_complete;
|
||||
}
|
||||
|
||||
rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
|
||||
|
||||
ret = pblk_alloc_rqd_meta(pblk, rqd);
|
||||
if (ret) {
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
|
||||
goto fail_complete;
|
||||
}
|
||||
|
||||
rqd->bio = NULL;
|
||||
rqd->opcode = NVM_OP_PWRITE;
|
||||
rqd->is_seq = 1;
|
||||
rqd->nr_ppas = rq_ppas;
|
||||
rqd->end_io = pblk_end_io_recov;
|
||||
rqd->private = pad_rq;
|
||||
|
||||
ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
meta_list = rqd->meta_list;
|
||||
|
||||
for (i = 0; i < rqd->nr_ppas; ) {
|
||||
struct ppa_addr ppa;
|
||||
int pos;
|
||||
|
||||
w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
|
||||
ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
|
||||
pos = pblk_ppa_to_pos(geo, ppa);
|
||||
|
||||
while (test_bit(pos, line->blk_bitmap)) {
|
||||
w_ptr += pblk->min_write_pgs;
|
||||
ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
|
||||
pos = pblk_ppa_to_pos(geo, ppa);
|
||||
}
|
||||
|
||||
for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
|
||||
struct ppa_addr dev_ppa;
|
||||
struct pblk_sec_meta *meta;
|
||||
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
|
||||
|
||||
dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
|
||||
|
||||
pblk_map_invalidate(pblk, dev_ppa);
|
||||
lba_list[w_ptr] = addr_empty;
|
||||
meta = pblk_get_meta(pblk, meta_list, i);
|
||||
meta->lba = addr_empty;
|
||||
ppa_list[i] = dev_ppa;
|
||||
}
|
||||
}
|
||||
|
||||
kref_get(&pad_rq->ref);
|
||||
pblk_down_chunk(pblk, ppa_list[0]);
|
||||
|
||||
ret = pblk_submit_io(pblk, rqd, data);
|
||||
if (ret) {
|
||||
pblk_err(pblk, "I/O submission failed: %d\n", ret);
|
||||
pblk_up_chunk(pblk, ppa_list[0]);
|
||||
kref_put(&pad_rq->ref, pblk_recov_complete);
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
|
||||
goto fail_complete;
|
||||
}
|
||||
|
||||
left_line_ppas -= rq_ppas;
|
||||
left_ppas -= rq_ppas;
|
||||
if (left_ppas && left_line_ppas)
|
||||
goto next_pad_rq;
|
||||
|
||||
fail_complete:
|
||||
kref_put(&pad_rq->ref, pblk_recov_complete);
|
||||
wait_for_completion(&pad_rq->wait);
|
||||
|
||||
if (!pblk_line_is_full(line))
|
||||
pblk_err(pblk, "corrupted padded line: %d\n", line->id);
|
||||
|
||||
vfree(data);
|
||||
free_rq:
|
||||
kfree(pad_rq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int distance = geo->mw_cunits * geo->all_luns * geo->ws_opt;
|
||||
|
||||
return (distance > line->left_msecs) ? line->left_msecs : distance;
|
||||
}
|
||||
|
||||
/* Return a chunk belonging to a line by stripe(write order) index */
|
||||
static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
|
||||
struct pblk_line *line,
|
||||
int index)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_lun *rlun;
|
||||
struct ppa_addr ppa;
|
||||
int pos;
|
||||
|
||||
rlun = &pblk->luns[index];
|
||||
ppa = rlun->bppa;
|
||||
pos = pblk_ppa_to_pos(geo, ppa);
|
||||
|
||||
return &line->chks[pos];
|
||||
}
|
||||
|
||||
static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
|
||||
struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
int blk_in_line = lm->blk_per_line;
|
||||
struct nvm_chk_meta *chunk;
|
||||
u64 max_wp, min_wp;
|
||||
int i;
|
||||
|
||||
i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
|
||||
|
||||
/* If there is one or zero good chunks in the line,
|
||||
* the write pointers can't be unbalanced.
|
||||
*/
|
||||
if (i >= (blk_in_line - 1))
|
||||
return 0;
|
||||
|
||||
chunk = pblk_get_stripe_chunk(pblk, line, i);
|
||||
max_wp = chunk->wp;
|
||||
if (max_wp > pblk->max_write_pgs)
|
||||
min_wp = max_wp - pblk->max_write_pgs;
|
||||
else
|
||||
min_wp = 0;
|
||||
|
||||
i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
|
||||
while (i < blk_in_line) {
|
||||
chunk = pblk_get_stripe_chunk(pblk, line, i);
|
||||
if (chunk->wp > max_wp || chunk->wp < min_wp)
|
||||
return 1;
|
||||
|
||||
i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
|
||||
struct pblk_recov_alloc p)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct ppa_addr *ppa_list;
|
||||
void *meta_list;
|
||||
struct nvm_rq *rqd;
|
||||
void *data;
|
||||
dma_addr_t dma_ppa_list, dma_meta_list;
|
||||
__le64 *lba_list;
|
||||
u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
|
||||
bool padded = false;
|
||||
int rq_ppas;
|
||||
int i, j;
|
||||
int ret;
|
||||
u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
|
||||
|
||||
if (pblk_line_wps_are_unbalanced(pblk, line))
|
||||
pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
|
||||
|
||||
ppa_list = p.ppa_list;
|
||||
meta_list = p.meta_list;
|
||||
rqd = p.rqd;
|
||||
data = p.data;
|
||||
dma_ppa_list = p.dma_ppa_list;
|
||||
dma_meta_list = p.dma_meta_list;
|
||||
|
||||
lba_list = emeta_to_lbas(pblk, line->emeta->buf);
|
||||
|
||||
next_rq:
|
||||
memset(rqd, 0, pblk_g_rq_size);
|
||||
|
||||
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
|
||||
if (!rq_ppas)
|
||||
rq_ppas = pblk->min_write_pgs;
|
||||
|
||||
retry_rq:
|
||||
rqd->bio = NULL;
|
||||
rqd->opcode = NVM_OP_PREAD;
|
||||
rqd->meta_list = meta_list;
|
||||
rqd->nr_ppas = rq_ppas;
|
||||
rqd->ppa_list = ppa_list;
|
||||
rqd->dma_ppa_list = dma_ppa_list;
|
||||
rqd->dma_meta_list = dma_meta_list;
|
||||
ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
|
||||
if (pblk_io_aligned(pblk, rq_ppas))
|
||||
rqd->is_seq = 1;
|
||||
|
||||
for (i = 0; i < rqd->nr_ppas; ) {
|
||||
struct ppa_addr ppa;
|
||||
int pos;
|
||||
|
||||
ppa = addr_to_gen_ppa(pblk, paddr, line->id);
|
||||
pos = pblk_ppa_to_pos(geo, ppa);
|
||||
|
||||
while (test_bit(pos, line->blk_bitmap)) {
|
||||
paddr += pblk->min_write_pgs;
|
||||
ppa = addr_to_gen_ppa(pblk, paddr, line->id);
|
||||
pos = pblk_ppa_to_pos(geo, ppa);
|
||||
}
|
||||
|
||||
for (j = 0; j < pblk->min_write_pgs; j++, i++)
|
||||
ppa_list[i] =
|
||||
addr_to_gen_ppa(pblk, paddr + j, line->id);
|
||||
}
|
||||
|
||||
ret = pblk_submit_io_sync(pblk, rqd, data);
|
||||
if (ret) {
|
||||
pblk_err(pblk, "I/O submission failed: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
|
||||
/* If a read fails, do a best effort by padding the line and retrying */
|
||||
if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
|
||||
int pad_distance, ret;
|
||||
|
||||
if (padded) {
|
||||
pblk_log_read_err(pblk, rqd);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
pad_distance = pblk_pad_distance(pblk, line);
|
||||
ret = pblk_recov_pad_line(pblk, line, pad_distance);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
padded = true;
|
||||
goto retry_rq;
|
||||
}
|
||||
|
||||
pblk_get_packed_meta(pblk, rqd);
|
||||
|
||||
for (i = 0; i < rqd->nr_ppas; i++) {
|
||||
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
|
||||
u64 lba = le64_to_cpu(meta->lba);
|
||||
|
||||
lba_list[paddr++] = cpu_to_le64(lba);
|
||||
|
||||
if (lba == ADDR_EMPTY || lba >= pblk->capacity)
|
||||
continue;
|
||||
|
||||
line->nr_valid_lbas++;
|
||||
pblk_update_map(pblk, lba, ppa_list[i]);
|
||||
}
|
||||
|
||||
left_ppas -= rq_ppas;
|
||||
if (left_ppas > 0)
|
||||
goto next_rq;
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
WARN_ON(padded && !pblk_line_is_full(line));
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Scan line for lbas on out of bound area */
|
||||
static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct nvm_rq *rqd;
|
||||
struct ppa_addr *ppa_list;
|
||||
void *meta_list;
|
||||
struct pblk_recov_alloc p;
|
||||
void *data;
|
||||
dma_addr_t dma_ppa_list, dma_meta_list;
|
||||
int ret = 0;
|
||||
|
||||
meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
|
||||
if (!meta_list)
|
||||
return -ENOMEM;
|
||||
|
||||
ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk);
|
||||
dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
|
||||
|
||||
data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
|
||||
if (!data) {
|
||||
ret = -ENOMEM;
|
||||
goto free_meta_list;
|
||||
}
|
||||
|
||||
rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL);
|
||||
memset(rqd, 0, pblk_g_rq_size);
|
||||
|
||||
p.ppa_list = ppa_list;
|
||||
p.meta_list = meta_list;
|
||||
p.rqd = rqd;
|
||||
p.data = data;
|
||||
p.dma_ppa_list = dma_ppa_list;
|
||||
p.dma_meta_list = dma_meta_list;
|
||||
|
||||
ret = pblk_recov_scan_oob(pblk, line, p);
|
||||
if (ret) {
|
||||
pblk_err(pblk, "could not recover L2P form OOB\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (pblk_line_is_full(line))
|
||||
pblk_line_recov_close(pblk, line);
|
||||
|
||||
out:
|
||||
mempool_free(rqd, &pblk->r_rq_pool);
|
||||
kfree(data);
|
||||
free_meta_list:
|
||||
nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Insert lines ordered by sequence number (seq_num) on list */
|
||||
static void pblk_recov_line_add_ordered(struct list_head *head,
|
||||
struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line *t = NULL;
|
||||
|
||||
list_for_each_entry(t, head, list)
|
||||
if (t->seq_nr > line->seq_nr)
|
||||
break;
|
||||
|
||||
__list_add(&line->list, t->list.prev, &t->list);
|
||||
}
|
||||
|
||||
static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
unsigned int emeta_secs;
|
||||
u64 emeta_start;
|
||||
struct ppa_addr ppa;
|
||||
int pos;
|
||||
|
||||
emeta_secs = lm->emeta_sec[0];
|
||||
emeta_start = lm->sec_per_line;
|
||||
|
||||
while (emeta_secs) {
|
||||
emeta_start--;
|
||||
ppa = addr_to_gen_ppa(pblk, emeta_start, line->id);
|
||||
pos = pblk_ppa_to_pos(geo, ppa);
|
||||
if (!test_bit(pos, line->blk_bitmap))
|
||||
emeta_secs--;
|
||||
}
|
||||
|
||||
return emeta_start;
|
||||
}
|
||||
|
||||
static int pblk_recov_check_line_version(struct pblk *pblk,
|
||||
struct line_emeta *emeta)
|
||||
{
|
||||
struct line_header *header = &emeta->header;
|
||||
|
||||
if (header->version_major != EMETA_VERSION_MAJOR) {
|
||||
pblk_err(pblk, "line major version mismatch: %d, expected: %d\n",
|
||||
header->version_major, EMETA_VERSION_MAJOR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
if (header->version_minor > EMETA_VERSION_MINOR)
|
||||
pblk_info(pblk, "newer line minor version found: %d\n",
|
||||
header->version_minor);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pblk_recov_wa_counters(struct pblk *pblk,
|
||||
struct line_emeta *emeta)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct line_header *header = &emeta->header;
|
||||
struct wa_counters *wa = emeta_to_wa(lm, emeta);
|
||||
|
||||
/* WA counters were introduced in emeta version 0.2 */
|
||||
if (header->version_major > 0 || header->version_minor >= 2) {
|
||||
u64 user = le64_to_cpu(wa->user);
|
||||
u64 pad = le64_to_cpu(wa->pad);
|
||||
u64 gc = le64_to_cpu(wa->gc);
|
||||
|
||||
atomic64_set(&pblk->user_wa, user);
|
||||
atomic64_set(&pblk->pad_wa, pad);
|
||||
atomic64_set(&pblk->gc_wa, gc);
|
||||
|
||||
pblk->user_rst_wa = user;
|
||||
pblk->pad_rst_wa = pad;
|
||||
pblk->gc_rst_wa = gc;
|
||||
}
|
||||
}
|
||||
|
||||
static int pblk_line_was_written(struct pblk_line *line,
|
||||
struct pblk *pblk)
|
||||
{
|
||||
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct nvm_chk_meta *chunk;
|
||||
struct ppa_addr bppa;
|
||||
int smeta_blk;
|
||||
|
||||
if (line->state == PBLK_LINESTATE_BAD)
|
||||
return 0;
|
||||
|
||||
smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
|
||||
if (smeta_blk >= lm->blk_per_line)
|
||||
return 0;
|
||||
|
||||
bppa = pblk->luns[smeta_blk].bppa;
|
||||
chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
|
||||
|
||||
if (chunk->state & NVM_CHK_ST_CLOSED ||
|
||||
(chunk->state & NVM_CHK_ST_OPEN
|
||||
&& chunk->wp >= lm->smeta_sec))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < lm->blk_per_line; i++)
|
||||
if (line->chks[i].state & NVM_CHK_ST_OPEN)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct pblk_line *line, *tline, *data_line = NULL;
|
||||
struct pblk_smeta *smeta;
|
||||
struct pblk_emeta *emeta;
|
||||
struct line_smeta *smeta_buf;
|
||||
int found_lines = 0, recovered_lines = 0, open_lines = 0;
|
||||
int is_next = 0;
|
||||
int meta_line;
|
||||
int i, valid_uuid = 0;
|
||||
LIST_HEAD(recov_list);
|
||||
|
||||
/* TODO: Implement FTL snapshot */
|
||||
|
||||
/* Scan recovery - takes place when FTL snapshot fails */
|
||||
spin_lock(&l_mg->free_lock);
|
||||
meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
|
||||
set_bit(meta_line, &l_mg->meta_bitmap);
|
||||
smeta = l_mg->sline_meta[meta_line];
|
||||
emeta = l_mg->eline_meta[meta_line];
|
||||
smeta_buf = (struct line_smeta *)smeta;
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
/* Order data lines using their sequence number */
|
||||
for (i = 0; i < l_mg->nr_lines; i++) {
|
||||
u32 crc;
|
||||
|
||||
line = &pblk->lines[i];
|
||||
|
||||
memset(smeta, 0, lm->smeta_len);
|
||||
line->smeta = smeta;
|
||||
line->lun_bitmap = ((void *)(smeta_buf)) +
|
||||
sizeof(struct line_smeta);
|
||||
|
||||
if (!pblk_line_was_written(line, pblk))
|
||||
continue;
|
||||
|
||||
/* Lines that cannot be read are assumed as not written here */
|
||||
if (pblk_line_smeta_read(pblk, line))
|
||||
continue;
|
||||
|
||||
crc = pblk_calc_smeta_crc(pblk, smeta_buf);
|
||||
if (le32_to_cpu(smeta_buf->crc) != crc)
|
||||
continue;
|
||||
|
||||
if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
|
||||
continue;
|
||||
|
||||
if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) {
|
||||
pblk_err(pblk, "found incompatible line version %u\n",
|
||||
smeta_buf->header.version_major);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
/* The first valid instance uuid is used for initialization */
|
||||
if (!valid_uuid) {
|
||||
import_guid(&pblk->instance_uuid, smeta_buf->header.uuid);
|
||||
valid_uuid = 1;
|
||||
}
|
||||
|
||||
if (!guid_equal(&pblk->instance_uuid,
|
||||
(guid_t *)&smeta_buf->header.uuid)) {
|
||||
pblk_debug(pblk, "ignore line %u due to uuid mismatch\n",
|
||||
i);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Update line metadata */
|
||||
spin_lock(&line->lock);
|
||||
line->id = le32_to_cpu(smeta_buf->header.id);
|
||||
line->type = le16_to_cpu(smeta_buf->header.type);
|
||||
line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
/* Update general metadata */
|
||||
spin_lock(&l_mg->free_lock);
|
||||
if (line->seq_nr >= l_mg->d_seq_nr)
|
||||
l_mg->d_seq_nr = line->seq_nr + 1;
|
||||
l_mg->nr_free_lines--;
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
if (pblk_line_recov_alloc(pblk, line))
|
||||
goto out;
|
||||
|
||||
pblk_recov_line_add_ordered(&recov_list, line);
|
||||
found_lines++;
|
||||
pblk_debug(pblk, "recovering data line %d, seq:%llu\n",
|
||||
line->id, smeta_buf->seq_nr);
|
||||
}
|
||||
|
||||
if (!found_lines) {
|
||||
guid_gen(&pblk->instance_uuid);
|
||||
|
||||
spin_lock(&l_mg->free_lock);
|
||||
WARN_ON_ONCE(!test_and_clear_bit(meta_line,
|
||||
&l_mg->meta_bitmap));
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Verify closed blocks and recover this portion of L2P table*/
|
||||
list_for_each_entry_safe(line, tline, &recov_list, list) {
|
||||
recovered_lines++;
|
||||
|
||||
line->emeta_ssec = pblk_line_emeta_start(pblk, line);
|
||||
line->emeta = emeta;
|
||||
memset(line->emeta->buf, 0, lm->emeta_len[0]);
|
||||
|
||||
if (pblk_line_is_open(pblk, line)) {
|
||||
pblk_recov_l2p_from_oob(pblk, line);
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) {
|
||||
pblk_recov_l2p_from_oob(pblk, line);
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (pblk_recov_check_emeta(pblk, line->emeta->buf)) {
|
||||
pblk_recov_l2p_from_oob(pblk, line);
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (pblk_recov_check_line_version(pblk, line->emeta->buf))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
pblk_recov_wa_counters(pblk, line->emeta->buf);
|
||||
|
||||
if (pblk_recov_l2p_from_emeta(pblk, line))
|
||||
pblk_recov_l2p_from_oob(pblk, line);
|
||||
|
||||
next:
|
||||
if (pblk_line_is_full(line)) {
|
||||
struct list_head *move_list;
|
||||
|
||||
spin_lock(&line->lock);
|
||||
line->state = PBLK_LINESTATE_CLOSED;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
move_list = pblk_line_gc_list(pblk, line);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
spin_lock(&l_mg->gc_lock);
|
||||
list_move_tail(&line->list, move_list);
|
||||
spin_unlock(&l_mg->gc_lock);
|
||||
|
||||
mempool_free(line->map_bitmap, l_mg->bitmap_pool);
|
||||
line->map_bitmap = NULL;
|
||||
line->smeta = NULL;
|
||||
line->emeta = NULL;
|
||||
} else {
|
||||
spin_lock(&line->lock);
|
||||
line->state = PBLK_LINESTATE_OPEN;
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
line->emeta->mem = 0;
|
||||
atomic_set(&line->emeta->sync, 0);
|
||||
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
|
||||
data_line = line;
|
||||
line->meta_line = meta_line;
|
||||
|
||||
open_lines++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!open_lines) {
|
||||
spin_lock(&l_mg->free_lock);
|
||||
WARN_ON_ONCE(!test_and_clear_bit(meta_line,
|
||||
&l_mg->meta_bitmap));
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
} else {
|
||||
spin_lock(&l_mg->free_lock);
|
||||
l_mg->data_line = data_line;
|
||||
/* Allocate next line for preparation */
|
||||
l_mg->data_next = pblk_line_get(pblk);
|
||||
if (l_mg->data_next) {
|
||||
l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
|
||||
l_mg->data_next->type = PBLK_LINETYPE_DATA;
|
||||
is_next = 1;
|
||||
}
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
}
|
||||
|
||||
if (is_next)
|
||||
pblk_line_erase(pblk, l_mg->data_next);
|
||||
|
||||
out:
|
||||
if (found_lines != recovered_lines)
|
||||
pblk_err(pblk, "failed to recover all found lines %d/%d\n",
|
||||
found_lines, recovered_lines);
|
||||
|
||||
return data_line;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pad current line
|
||||
*/
|
||||
int pblk_recov_pad(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_line *line;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
int left_msecs;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&l_mg->free_lock);
|
||||
line = l_mg->data_line;
|
||||
left_msecs = line->left_msecs;
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
ret = pblk_recov_pad_line(pblk, line, left_msecs);
|
||||
if (ret) {
|
||||
pblk_err(pblk, "tear down padding failed (%d)\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
pblk_line_close_meta(pblk, line);
|
||||
return ret;
|
||||
}
|
@ -1,254 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
|
||||
* Matias Bjorling <matias@cnexlabs.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* pblk-rl.c - pblk's rate limiter for user I/O
|
||||
*
|
||||
*/
|
||||
|
||||
#include "pblk.h"
|
||||
|
||||
static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
|
||||
{
|
||||
mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
|
||||
}
|
||||
|
||||
int pblk_rl_is_limit(struct pblk_rl *rl)
|
||||
{
|
||||
int rb_space;
|
||||
|
||||
rb_space = atomic_read(&rl->rb_space);
|
||||
|
||||
return (rb_space == 0);
|
||||
}
|
||||
|
||||
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
|
||||
{
|
||||
int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
|
||||
int rb_space = atomic_read(&rl->rb_space);
|
||||
|
||||
if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
|
||||
return NVM_IO_ERR;
|
||||
|
||||
if (rb_user_cnt >= rl->rb_user_max)
|
||||
return NVM_IO_REQUEUE;
|
||||
|
||||
return NVM_IO_OK;
|
||||
}
|
||||
|
||||
void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
|
||||
{
|
||||
int rb_space = atomic_read(&rl->rb_space);
|
||||
|
||||
if (unlikely(rb_space >= 0))
|
||||
atomic_sub(nr_entries, &rl->rb_space);
|
||||
}
|
||||
|
||||
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
|
||||
{
|
||||
int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt);
|
||||
int rb_user_active;
|
||||
|
||||
/* If there is no user I/O let GC take over space on the write buffer */
|
||||
rb_user_active = READ_ONCE(rl->rb_user_active);
|
||||
return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
|
||||
}
|
||||
|
||||
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
|
||||
{
|
||||
atomic_add(nr_entries, &rl->rb_user_cnt);
|
||||
|
||||
/* Release user I/O state. Protect from GC */
|
||||
smp_store_release(&rl->rb_user_active, 1);
|
||||
pblk_rl_kick_u_timer(rl);
|
||||
}
|
||||
|
||||
void pblk_rl_werr_line_in(struct pblk_rl *rl)
|
||||
{
|
||||
atomic_inc(&rl->werr_lines);
|
||||
}
|
||||
|
||||
void pblk_rl_werr_line_out(struct pblk_rl *rl)
|
||||
{
|
||||
atomic_dec(&rl->werr_lines);
|
||||
}
|
||||
|
||||
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
|
||||
{
|
||||
atomic_add(nr_entries, &rl->rb_gc_cnt);
|
||||
}
|
||||
|
||||
void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc)
|
||||
{
|
||||
atomic_sub(nr_user, &rl->rb_user_cnt);
|
||||
atomic_sub(nr_gc, &rl->rb_gc_cnt);
|
||||
}
|
||||
|
||||
unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl)
|
||||
{
|
||||
return atomic_read(&rl->free_blocks);
|
||||
}
|
||||
|
||||
unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl)
|
||||
{
|
||||
return atomic_read(&rl->free_user_blocks);
|
||||
}
|
||||
|
||||
static void __pblk_rl_update_rates(struct pblk_rl *rl,
|
||||
unsigned long free_blocks)
|
||||
{
|
||||
struct pblk *pblk = container_of(rl, struct pblk, rl);
|
||||
int max = rl->rb_budget;
|
||||
int werr_gc_needed = atomic_read(&rl->werr_lines);
|
||||
|
||||
if (free_blocks >= rl->high) {
|
||||
if (werr_gc_needed) {
|
||||
/* Allocate a small budget for recovering
|
||||
* lines with write errors
|
||||
*/
|
||||
rl->rb_gc_max = 1 << rl->rb_windows_pw;
|
||||
rl->rb_user_max = max - rl->rb_gc_max;
|
||||
rl->rb_state = PBLK_RL_WERR;
|
||||
} else {
|
||||
rl->rb_user_max = max;
|
||||
rl->rb_gc_max = 0;
|
||||
rl->rb_state = PBLK_RL_OFF;
|
||||
}
|
||||
} else if (free_blocks < rl->high) {
|
||||
int shift = rl->high_pw - rl->rb_windows_pw;
|
||||
int user_windows = free_blocks >> shift;
|
||||
int user_max = user_windows << ilog2(NVM_MAX_VLBA);
|
||||
|
||||
rl->rb_user_max = user_max;
|
||||
rl->rb_gc_max = max - user_max;
|
||||
|
||||
if (free_blocks <= rl->rsv_blocks) {
|
||||
rl->rb_user_max = 0;
|
||||
rl->rb_gc_max = max;
|
||||
}
|
||||
|
||||
/* In the worst case, we will need to GC lines in the low list
|
||||
* (high valid sector count). If there are lines to GC on high
|
||||
* or mid lists, these will be prioritized
|
||||
*/
|
||||
rl->rb_state = PBLK_RL_LOW;
|
||||
}
|
||||
|
||||
if (rl->rb_state != PBLK_RL_OFF)
|
||||
pblk_gc_should_start(pblk);
|
||||
else
|
||||
pblk_gc_should_stop(pblk);
|
||||
}
|
||||
|
||||
void pblk_rl_update_rates(struct pblk_rl *rl)
|
||||
{
|
||||
__pblk_rl_update_rates(rl, pblk_rl_nr_user_free_blks(rl));
|
||||
}
|
||||
|
||||
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
|
||||
{
|
||||
int blk_in_line = atomic_read(&line->blk_in_line);
|
||||
int free_blocks;
|
||||
|
||||
atomic_add(blk_in_line, &rl->free_blocks);
|
||||
free_blocks = atomic_add_return(blk_in_line, &rl->free_user_blocks);
|
||||
|
||||
__pblk_rl_update_rates(rl, free_blocks);
|
||||
}
|
||||
|
||||
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
|
||||
bool used)
|
||||
{
|
||||
int blk_in_line = atomic_read(&line->blk_in_line);
|
||||
int free_blocks;
|
||||
|
||||
atomic_sub(blk_in_line, &rl->free_blocks);
|
||||
|
||||
if (used)
|
||||
free_blocks = atomic_sub_return(blk_in_line,
|
||||
&rl->free_user_blocks);
|
||||
else
|
||||
free_blocks = atomic_read(&rl->free_user_blocks);
|
||||
|
||||
__pblk_rl_update_rates(rl, free_blocks);
|
||||
}
|
||||
|
||||
int pblk_rl_high_thrs(struct pblk_rl *rl)
|
||||
{
|
||||
return rl->high;
|
||||
}
|
||||
|
||||
int pblk_rl_max_io(struct pblk_rl *rl)
|
||||
{
|
||||
return rl->rb_max_io;
|
||||
}
|
||||
|
||||
static void pblk_rl_u_timer(struct timer_list *t)
|
||||
{
|
||||
struct pblk_rl *rl = from_timer(rl, t, u_timer);
|
||||
|
||||
/* Release user I/O state. Protect from GC */
|
||||
smp_store_release(&rl->rb_user_active, 0);
|
||||
}
|
||||
|
||||
void pblk_rl_free(struct pblk_rl *rl)
|
||||
{
|
||||
del_timer(&rl->u_timer);
|
||||
}
|
||||
|
||||
void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold)
|
||||
{
|
||||
struct pblk *pblk = container_of(rl, struct pblk, rl);
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
int sec_meta, blk_meta;
|
||||
unsigned int rb_windows;
|
||||
|
||||
/* Consider sectors used for metadata */
|
||||
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
|
||||
blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
|
||||
|
||||
rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
|
||||
rl->high_pw = get_count_order(rl->high);
|
||||
|
||||
rl->rsv_blocks = pblk_get_min_chks(pblk);
|
||||
|
||||
/* This will always be a power-of-2 */
|
||||
rb_windows = budget / NVM_MAX_VLBA;
|
||||
rl->rb_windows_pw = get_count_order(rb_windows);
|
||||
|
||||
/* To start with, all buffer is available to user I/O writers */
|
||||
rl->rb_budget = budget;
|
||||
rl->rb_user_max = budget;
|
||||
rl->rb_gc_max = 0;
|
||||
rl->rb_state = PBLK_RL_HIGH;
|
||||
|
||||
/* Maximize I/O size and ansure that back threshold is respected */
|
||||
if (threshold)
|
||||
rl->rb_max_io = budget - pblk->min_write_pgs_data - threshold;
|
||||
else
|
||||
rl->rb_max_io = budget - pblk->min_write_pgs_data - 1;
|
||||
|
||||
atomic_set(&rl->rb_user_cnt, 0);
|
||||
atomic_set(&rl->rb_gc_cnt, 0);
|
||||
atomic_set(&rl->rb_space, -1);
|
||||
atomic_set(&rl->werr_lines, 0);
|
||||
|
||||
timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);
|
||||
|
||||
rl->rb_user_active = 0;
|
||||
rl->rb_gc_active = 0;
|
||||
}
|
@ -1,728 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
|
||||
* Matias Bjorling <matias@cnexlabs.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Implementation of a physical block-device target for Open-channel SSDs.
|
||||
*
|
||||
* pblk-sysfs.c - pblk's sysfs
|
||||
*
|
||||
*/
|
||||
|
||||
#include "pblk.h"
|
||||
|
||||
static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_lun *rlun;
|
||||
ssize_t sz = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < geo->all_luns; i++) {
|
||||
int active = 1;
|
||||
|
||||
rlun = &pblk->luns[i];
|
||||
if (!down_trylock(&rlun->wr_sem)) {
|
||||
active = 0;
|
||||
up(&rlun->wr_sem);
|
||||
}
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"pblk: pos:%d, ch:%d, lun:%d - %d\n",
|
||||
i,
|
||||
rlun->bppa.a.ch,
|
||||
rlun->bppa.a.lun,
|
||||
active);
|
||||
}
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
|
||||
{
|
||||
int free_blocks, free_user_blocks, total_blocks;
|
||||
int rb_user_max, rb_user_cnt;
|
||||
int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
|
||||
|
||||
free_blocks = pblk_rl_nr_free_blks(&pblk->rl);
|
||||
free_user_blocks = pblk_rl_nr_user_free_blks(&pblk->rl);
|
||||
rb_user_max = pblk->rl.rb_user_max;
|
||||
rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
|
||||
rb_gc_max = pblk->rl.rb_gc_max;
|
||||
rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
|
||||
rb_budget = pblk->rl.rb_budget;
|
||||
rb_state = pblk->rl.rb_state;
|
||||
|
||||
total_blocks = pblk->rl.total_blocks;
|
||||
|
||||
return snprintf(page, PAGE_SIZE,
|
||||
"u:%u/%u,gc:%u/%u(%u)(stop:<%u,full:>%u,free:%d/%d/%d)-%d\n",
|
||||
rb_user_cnt,
|
||||
rb_user_max,
|
||||
rb_gc_cnt,
|
||||
rb_gc_max,
|
||||
rb_state,
|
||||
rb_budget,
|
||||
pblk->rl.high,
|
||||
free_blocks,
|
||||
free_user_blocks,
|
||||
total_blocks,
|
||||
READ_ONCE(pblk->rl.rb_user_active));
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_gc_state_show(struct pblk *pblk, char *page)
|
||||
{
|
||||
int gc_enabled, gc_active;
|
||||
|
||||
pblk_gc_sysfs_state_show(pblk, &gc_enabled, &gc_active);
|
||||
return snprintf(page, PAGE_SIZE, "gc_enabled=%d, gc_active=%d\n",
|
||||
gc_enabled, gc_active);
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_stats(struct pblk *pblk, char *page)
|
||||
{
|
||||
ssize_t sz;
|
||||
|
||||
sz = snprintf(page, PAGE_SIZE,
|
||||
"read_failed=%lu, read_high_ecc=%lu, read_empty=%lu, read_failed_gc=%lu, write_failed=%lu, erase_failed=%lu\n",
|
||||
atomic_long_read(&pblk->read_failed),
|
||||
atomic_long_read(&pblk->read_high_ecc),
|
||||
atomic_long_read(&pblk->read_empty),
|
||||
atomic_long_read(&pblk->read_failed_gc),
|
||||
atomic_long_read(&pblk->write_failed),
|
||||
atomic_long_read(&pblk->erase_failed));
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_write_buffer(struct pblk *pblk, char *page)
|
||||
{
|
||||
return pblk_rb_sysfs(&pblk->rwb, page);
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
ssize_t sz = 0;
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
|
||||
struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf;
|
||||
|
||||
sz = scnprintf(page, PAGE_SIZE,
|
||||
"g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
|
||||
pblk->addrf_len,
|
||||
ppaf->blk_offset, ppaf->blk_len,
|
||||
ppaf->pg_offset, ppaf->pg_len,
|
||||
ppaf->lun_offset, ppaf->lun_len,
|
||||
ppaf->ch_offset, ppaf->ch_len,
|
||||
ppaf->pln_offset, ppaf->pln_len,
|
||||
ppaf->sec_offset, ppaf->sec_len);
|
||||
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
|
||||
gppaf->blk_offset, gppaf->blk_len,
|
||||
gppaf->pg_offset, gppaf->pg_len,
|
||||
gppaf->lun_offset, gppaf->lun_len,
|
||||
gppaf->ch_offset, gppaf->ch_len,
|
||||
gppaf->pln_offset, gppaf->pln_len,
|
||||
gppaf->sec_offset, gppaf->sec_len);
|
||||
} else {
|
||||
struct nvm_addrf *ppaf = &pblk->addrf;
|
||||
struct nvm_addrf *gppaf = &geo->addrf;
|
||||
|
||||
sz = scnprintf(page, PAGE_SIZE,
|
||||
"pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n",
|
||||
pblk->addrf_len,
|
||||
ppaf->ch_offset, ppaf->ch_len,
|
||||
ppaf->lun_offset, ppaf->lun_len,
|
||||
ppaf->chk_offset, ppaf->chk_len,
|
||||
ppaf->sec_offset, ppaf->sec_len);
|
||||
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n",
|
||||
gppaf->ch_offset, gppaf->ch_len,
|
||||
gppaf->lun_offset, gppaf->lun_len,
|
||||
gppaf->chk_offset, gppaf->chk_len,
|
||||
gppaf->sec_offset, gppaf->sec_len);
|
||||
}
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct pblk_line *line;
|
||||
ssize_t sz = 0;
|
||||
int nr_free_lines;
|
||||
int cur_data, cur_log;
|
||||
int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
|
||||
int d_line_cnt = 0, l_line_cnt = 0;
|
||||
int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
|
||||
int gc_werr = 0;
|
||||
|
||||
int bad = 0, cor = 0;
|
||||
int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
|
||||
int map_weight = 0, meta_weight = 0;
|
||||
|
||||
spin_lock(&l_mg->free_lock);
|
||||
cur_data = (l_mg->data_line) ? l_mg->data_line->id : -1;
|
||||
cur_log = (l_mg->log_line) ? l_mg->log_line->id : -1;
|
||||
nr_free_lines = l_mg->nr_free_lines;
|
||||
|
||||
list_for_each_entry(line, &l_mg->free_list, list)
|
||||
free_line_cnt++;
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
spin_lock(&l_mg->close_lock);
|
||||
list_for_each_entry(line, &l_mg->emeta_list, list)
|
||||
emeta_line_cnt++;
|
||||
spin_unlock(&l_mg->close_lock);
|
||||
|
||||
spin_lock(&l_mg->gc_lock);
|
||||
list_for_each_entry(line, &l_mg->gc_full_list, list) {
|
||||
if (line->type == PBLK_LINETYPE_DATA)
|
||||
d_line_cnt++;
|
||||
else if (line->type == PBLK_LINETYPE_LOG)
|
||||
l_line_cnt++;
|
||||
closed_line_cnt++;
|
||||
gc_full++;
|
||||
}
|
||||
|
||||
list_for_each_entry(line, &l_mg->gc_high_list, list) {
|
||||
if (line->type == PBLK_LINETYPE_DATA)
|
||||
d_line_cnt++;
|
||||
else if (line->type == PBLK_LINETYPE_LOG)
|
||||
l_line_cnt++;
|
||||
closed_line_cnt++;
|
||||
gc_high++;
|
||||
}
|
||||
|
||||
list_for_each_entry(line, &l_mg->gc_mid_list, list) {
|
||||
if (line->type == PBLK_LINETYPE_DATA)
|
||||
d_line_cnt++;
|
||||
else if (line->type == PBLK_LINETYPE_LOG)
|
||||
l_line_cnt++;
|
||||
closed_line_cnt++;
|
||||
gc_mid++;
|
||||
}
|
||||
|
||||
list_for_each_entry(line, &l_mg->gc_low_list, list) {
|
||||
if (line->type == PBLK_LINETYPE_DATA)
|
||||
d_line_cnt++;
|
||||
else if (line->type == PBLK_LINETYPE_LOG)
|
||||
l_line_cnt++;
|
||||
closed_line_cnt++;
|
||||
gc_low++;
|
||||
}
|
||||
|
||||
list_for_each_entry(line, &l_mg->gc_empty_list, list) {
|
||||
if (line->type == PBLK_LINETYPE_DATA)
|
||||
d_line_cnt++;
|
||||
else if (line->type == PBLK_LINETYPE_LOG)
|
||||
l_line_cnt++;
|
||||
closed_line_cnt++;
|
||||
gc_empty++;
|
||||
}
|
||||
|
||||
list_for_each_entry(line, &l_mg->gc_werr_list, list) {
|
||||
if (line->type == PBLK_LINETYPE_DATA)
|
||||
d_line_cnt++;
|
||||
else if (line->type == PBLK_LINETYPE_LOG)
|
||||
l_line_cnt++;
|
||||
closed_line_cnt++;
|
||||
gc_werr++;
|
||||
}
|
||||
|
||||
list_for_each_entry(line, &l_mg->bad_list, list)
|
||||
bad++;
|
||||
list_for_each_entry(line, &l_mg->corrupt_list, list)
|
||||
cor++;
|
||||
spin_unlock(&l_mg->gc_lock);
|
||||
|
||||
spin_lock(&l_mg->free_lock);
|
||||
if (l_mg->data_line) {
|
||||
cur_sec = l_mg->data_line->cur_sec;
|
||||
msecs = l_mg->data_line->left_msecs;
|
||||
vsc = le32_to_cpu(*l_mg->data_line->vsc);
|
||||
sec_in_line = l_mg->data_line->sec_in_line;
|
||||
meta_weight = bitmap_weight(&l_mg->meta_bitmap,
|
||||
PBLK_DATA_LINES);
|
||||
|
||||
spin_lock(&l_mg->data_line->lock);
|
||||
if (l_mg->data_line->map_bitmap)
|
||||
map_weight = bitmap_weight(l_mg->data_line->map_bitmap,
|
||||
lm->sec_per_line);
|
||||
else
|
||||
map_weight = 0;
|
||||
spin_unlock(&l_mg->data_line->lock);
|
||||
}
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
if (nr_free_lines != free_line_cnt)
|
||||
pblk_err(pblk, "corrupted free line list:%d/%d\n",
|
||||
nr_free_lines, free_line_cnt);
|
||||
|
||||
sz = scnprintf(page, PAGE_SIZE - sz,
|
||||
"line: nluns:%d, nblks:%d, nsecs:%d\n",
|
||||
geo->all_luns, lm->blk_per_line, lm->sec_per_line);
|
||||
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
|
||||
cur_data, cur_log,
|
||||
nr_free_lines,
|
||||
emeta_line_cnt, meta_weight,
|
||||
closed_line_cnt,
|
||||
bad, cor,
|
||||
d_line_cnt, l_line_cnt,
|
||||
l_mg->nr_lines);
|
||||
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
|
||||
gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
|
||||
atomic_read(&pblk->gc.read_inflight_gc));
|
||||
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
|
||||
cur_data, cur_sec, msecs, vsc, sec_in_line,
|
||||
map_weight, lm->sec_per_line,
|
||||
atomic_read(&pblk->inflight_io));
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
ssize_t sz = 0;
|
||||
|
||||
sz = scnprintf(page, PAGE_SIZE - sz,
|
||||
"smeta - len:%d, secs:%d\n",
|
||||
lm->smeta_len, lm->smeta_sec);
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"emeta - len:%d, sec:%d, bb_start:%d\n",
|
||||
lm->emeta_len[0], lm->emeta_sec[0],
|
||||
lm->emeta_bb);
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
|
||||
lm->sec_bitmap_len,
|
||||
lm->blk_bitmap_len,
|
||||
lm->lun_bitmap_len);
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"blk_line:%d, sec_line:%d, sec_blk:%d\n",
|
||||
lm->blk_per_line,
|
||||
lm->sec_per_line,
|
||||
geo->clba);
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
|
||||
{
|
||||
return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
|
||||
}
|
||||
|
||||
static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
|
||||
char *page)
|
||||
{
|
||||
int sz;
|
||||
|
||||
sz = scnprintf(page, PAGE_SIZE,
|
||||
"user:%lld gc:%lld pad:%lld WA:",
|
||||
user, gc, pad);
|
||||
|
||||
if (!user) {
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
|
||||
} else {
|
||||
u64 wa_int;
|
||||
u32 wa_frac;
|
||||
|
||||
wa_int = (user + gc + pad) * 100000;
|
||||
wa_int = div64_u64(wa_int, user);
|
||||
wa_int = div_u64_rem(wa_int, 100000, &wa_frac);
|
||||
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
|
||||
wa_int, wa_frac);
|
||||
}
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_get_write_amp_mileage(struct pblk *pblk, char *page)
|
||||
{
|
||||
return pblk_get_write_amp(atomic64_read(&pblk->user_wa),
|
||||
atomic64_read(&pblk->gc_wa), atomic64_read(&pblk->pad_wa),
|
||||
page);
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page)
|
||||
{
|
||||
return pblk_get_write_amp(
|
||||
atomic64_read(&pblk->user_wa) - pblk->user_rst_wa,
|
||||
atomic64_read(&pblk->gc_wa) - pblk->gc_rst_wa,
|
||||
atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page);
|
||||
}
|
||||
|
||||
static long long bucket_percentage(unsigned long long bucket,
|
||||
unsigned long long total)
|
||||
{
|
||||
int p = bucket * 100;
|
||||
|
||||
p = div_u64(p, total);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
|
||||
{
|
||||
int sz = 0;
|
||||
unsigned long long total;
|
||||
unsigned long long total_buckets = 0;
|
||||
int buckets = pblk->min_write_pgs - 1;
|
||||
int i;
|
||||
|
||||
total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
|
||||
if (!total) {
|
||||
for (i = 0; i < (buckets + 1); i++)
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"%d:0 ", i);
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
for (i = 0; i < buckets; i++)
|
||||
total_buckets += atomic64_read(&pblk->pad_dist[i]);
|
||||
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
|
||||
bucket_percentage(total - total_buckets, total));
|
||||
|
||||
for (i = 0; i < buckets; i++) {
|
||||
unsigned long long p;
|
||||
|
||||
p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
|
||||
total);
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
|
||||
i + 1, p);
|
||||
}
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
|
||||
{
|
||||
return snprintf(page, PAGE_SIZE,
|
||||
"%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
|
||||
atomic_long_read(&pblk->inflight_writes),
|
||||
atomic_long_read(&pblk->inflight_reads),
|
||||
atomic_long_read(&pblk->req_writes),
|
||||
(u64)atomic64_read(&pblk->nr_flush),
|
||||
atomic_long_read(&pblk->padded_writes),
|
||||
atomic_long_read(&pblk->padded_wb),
|
||||
atomic_long_read(&pblk->sub_writes),
|
||||
atomic_long_read(&pblk->sync_writes),
|
||||
atomic_long_read(&pblk->recov_writes),
|
||||
atomic_long_read(&pblk->recov_gc_writes),
|
||||
atomic_long_read(&pblk->recov_gc_reads),
|
||||
atomic_long_read(&pblk->cache_reads),
|
||||
atomic_long_read(&pblk->sync_reads));
|
||||
}
|
||||
#endif
|
||||
|
||||
static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
|
||||
size_t len)
|
||||
{
|
||||
size_t c_len;
|
||||
int force;
|
||||
|
||||
c_len = strcspn(page, "\n");
|
||||
if (c_len >= len)
|
||||
return -EINVAL;
|
||||
|
||||
if (kstrtouint(page, 0, &force))
|
||||
return -EINVAL;
|
||||
|
||||
pblk_gc_sysfs_force(pblk, force);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
|
||||
const char *page, size_t len)
|
||||
{
|
||||
size_t c_len;
|
||||
int sec_per_write;
|
||||
|
||||
c_len = strcspn(page, "\n");
|
||||
if (c_len >= len)
|
||||
return -EINVAL;
|
||||
|
||||
if (kstrtouint(page, 0, &sec_per_write))
|
||||
return -EINVAL;
|
||||
|
||||
if (!pblk_is_oob_meta_supported(pblk)) {
|
||||
/* For packed metadata case it is
|
||||
* not allowed to change sec_per_write.
|
||||
*/
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (sec_per_write < pblk->min_write_pgs
|
||||
|| sec_per_write > pblk->max_write_pgs
|
||||
|| sec_per_write % pblk->min_write_pgs != 0)
|
||||
return -EINVAL;
|
||||
|
||||
pblk_set_sec_per_write(pblk, sec_per_write);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk,
|
||||
const char *page, size_t len)
|
||||
{
|
||||
size_t c_len;
|
||||
int reset_value;
|
||||
|
||||
c_len = strcspn(page, "\n");
|
||||
if (c_len >= len)
|
||||
return -EINVAL;
|
||||
|
||||
if (kstrtouint(page, 0, &reset_value))
|
||||
return -EINVAL;
|
||||
|
||||
if (reset_value != 0)
|
||||
return -EINVAL;
|
||||
|
||||
pblk->user_rst_wa = atomic64_read(&pblk->user_wa);
|
||||
pblk->pad_rst_wa = atomic64_read(&pblk->pad_wa);
|
||||
pblk->gc_rst_wa = atomic64_read(&pblk->gc_wa);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk,
|
||||
const char *page, size_t len)
|
||||
{
|
||||
size_t c_len;
|
||||
int reset_value;
|
||||
int buckets = pblk->min_write_pgs - 1;
|
||||
int i;
|
||||
|
||||
c_len = strcspn(page, "\n");
|
||||
if (c_len >= len)
|
||||
return -EINVAL;
|
||||
|
||||
if (kstrtouint(page, 0, &reset_value))
|
||||
return -EINVAL;
|
||||
|
||||
if (reset_value != 0)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < buckets; i++)
|
||||
atomic64_set(&pblk->pad_dist[i], 0);
|
||||
|
||||
pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct attribute sys_write_luns = {
|
||||
.name = "write_luns",
|
||||
.mode = 0444,
|
||||
};
|
||||
|
||||
static struct attribute sys_rate_limiter_attr = {
|
||||
.name = "rate_limiter",
|
||||
.mode = 0444,
|
||||
};
|
||||
|
||||
static struct attribute sys_gc_state = {
|
||||
.name = "gc_state",
|
||||
.mode = 0444,
|
||||
};
|
||||
|
||||
static struct attribute sys_errors_attr = {
|
||||
.name = "errors",
|
||||
.mode = 0444,
|
||||
};
|
||||
|
||||
static struct attribute sys_rb_attr = {
|
||||
.name = "write_buffer",
|
||||
.mode = 0444,
|
||||
};
|
||||
|
||||
static struct attribute sys_stats_ppaf_attr = {
|
||||
.name = "ppa_format",
|
||||
.mode = 0444,
|
||||
};
|
||||
|
||||
static struct attribute sys_lines_attr = {
|
||||
.name = "lines",
|
||||
.mode = 0444,
|
||||
};
|
||||
|
||||
static struct attribute sys_lines_info_attr = {
|
||||
.name = "lines_info",
|
||||
.mode = 0444,
|
||||
};
|
||||
|
||||
static struct attribute sys_gc_force = {
|
||||
.name = "gc_force",
|
||||
.mode = 0200,
|
||||
};
|
||||
|
||||
static struct attribute sys_max_sec_per_write = {
|
||||
.name = "max_sec_per_write",
|
||||
.mode = 0644,
|
||||
};
|
||||
|
||||
static struct attribute sys_write_amp_mileage = {
|
||||
.name = "write_amp_mileage",
|
||||
.mode = 0444,
|
||||
};
|
||||
|
||||
static struct attribute sys_write_amp_trip = {
|
||||
.name = "write_amp_trip",
|
||||
.mode = 0644,
|
||||
};
|
||||
|
||||
static struct attribute sys_padding_dist = {
|
||||
.name = "padding_dist",
|
||||
.mode = 0644,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
static struct attribute sys_stats_debug_attr = {
|
||||
.name = "stats",
|
||||
.mode = 0444,
|
||||
};
|
||||
#endif
|
||||
|
||||
static struct attribute *pblk_attrs[] = {
|
||||
&sys_write_luns,
|
||||
&sys_rate_limiter_attr,
|
||||
&sys_errors_attr,
|
||||
&sys_gc_state,
|
||||
&sys_gc_force,
|
||||
&sys_max_sec_per_write,
|
||||
&sys_rb_attr,
|
||||
&sys_stats_ppaf_attr,
|
||||
&sys_lines_attr,
|
||||
&sys_lines_info_attr,
|
||||
&sys_write_amp_mileage,
|
||||
&sys_write_amp_trip,
|
||||
&sys_padding_dist,
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
&sys_stats_debug_attr,
|
||||
#endif
|
||||
NULL,
|
||||
};
|
||||
|
||||
static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct pblk *pblk = container_of(kobj, struct pblk, kobj);
|
||||
|
||||
if (strcmp(attr->name, "rate_limiter") == 0)
|
||||
return pblk_sysfs_rate_limiter(pblk, buf);
|
||||
else if (strcmp(attr->name, "write_luns") == 0)
|
||||
return pblk_sysfs_luns_show(pblk, buf);
|
||||
else if (strcmp(attr->name, "gc_state") == 0)
|
||||
return pblk_sysfs_gc_state_show(pblk, buf);
|
||||
else if (strcmp(attr->name, "errors") == 0)
|
||||
return pblk_sysfs_stats(pblk, buf);
|
||||
else if (strcmp(attr->name, "write_buffer") == 0)
|
||||
return pblk_sysfs_write_buffer(pblk, buf);
|
||||
else if (strcmp(attr->name, "ppa_format") == 0)
|
||||
return pblk_sysfs_ppaf(pblk, buf);
|
||||
else if (strcmp(attr->name, "lines") == 0)
|
||||
return pblk_sysfs_lines(pblk, buf);
|
||||
else if (strcmp(attr->name, "lines_info") == 0)
|
||||
return pblk_sysfs_lines_info(pblk, buf);
|
||||
else if (strcmp(attr->name, "max_sec_per_write") == 0)
|
||||
return pblk_sysfs_get_sec_per_write(pblk, buf);
|
||||
else if (strcmp(attr->name, "write_amp_mileage") == 0)
|
||||
return pblk_sysfs_get_write_amp_mileage(pblk, buf);
|
||||
else if (strcmp(attr->name, "write_amp_trip") == 0)
|
||||
return pblk_sysfs_get_write_amp_trip(pblk, buf);
|
||||
else if (strcmp(attr->name, "padding_dist") == 0)
|
||||
return pblk_sysfs_get_padding_dist(pblk, buf);
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
else if (strcmp(attr->name, "stats") == 0)
|
||||
return pblk_sysfs_stats_debug(pblk, buf);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct pblk *pblk = container_of(kobj, struct pblk, kobj);
|
||||
|
||||
if (strcmp(attr->name, "gc_force") == 0)
|
||||
return pblk_sysfs_gc_force(pblk, buf, len);
|
||||
else if (strcmp(attr->name, "max_sec_per_write") == 0)
|
||||
return pblk_sysfs_set_sec_per_write(pblk, buf, len);
|
||||
else if (strcmp(attr->name, "write_amp_trip") == 0)
|
||||
return pblk_sysfs_set_write_amp_trip(pblk, buf, len);
|
||||
else if (strcmp(attr->name, "padding_dist") == 0)
|
||||
return pblk_sysfs_set_padding_dist(pblk, buf, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct sysfs_ops pblk_sysfs_ops = {
|
||||
.show = pblk_sysfs_show,
|
||||
.store = pblk_sysfs_store,
|
||||
};
|
||||
|
||||
static struct kobj_type pblk_ktype = {
|
||||
.sysfs_ops = &pblk_sysfs_ops,
|
||||
.default_attrs = pblk_attrs,
|
||||
};
|
||||
|
||||
int pblk_sysfs_init(struct gendisk *tdisk)
|
||||
{
|
||||
struct pblk *pblk = tdisk->private_data;
|
||||
struct device *parent_dev = disk_to_dev(pblk->disk);
|
||||
int ret;
|
||||
|
||||
ret = kobject_init_and_add(&pblk->kobj, &pblk_ktype,
|
||||
kobject_get(&parent_dev->kobj),
|
||||
"%s", "pblk");
|
||||
if (ret) {
|
||||
pblk_err(pblk, "could not register\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
kobject_uevent(&pblk->kobj, KOBJ_ADD);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pblk_sysfs_exit(struct gendisk *tdisk)
|
||||
{
|
||||
struct pblk *pblk = tdisk->private_data;
|
||||
|
||||
kobject_uevent(&pblk->kobj, KOBJ_REMOVE);
|
||||
kobject_del(&pblk->kobj);
|
||||
kobject_put(&pblk->kobj);
|
||||
}
|
@ -1,145 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM pblk
|
||||
|
||||
#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_PBLK_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
struct ppa_addr;
|
||||
|
||||
#define show_chunk_flags(state) __print_flags(state, "", \
|
||||
{ NVM_CHK_ST_FREE, "FREE", }, \
|
||||
{ NVM_CHK_ST_CLOSED, "CLOSED", }, \
|
||||
{ NVM_CHK_ST_OPEN, "OPEN", }, \
|
||||
{ NVM_CHK_ST_OFFLINE, "OFFLINE", })
|
||||
|
||||
#define show_line_state(state) __print_symbolic(state, \
|
||||
{ PBLK_LINESTATE_NEW, "NEW", }, \
|
||||
{ PBLK_LINESTATE_FREE, "FREE", }, \
|
||||
{ PBLK_LINESTATE_OPEN, "OPEN", }, \
|
||||
{ PBLK_LINESTATE_CLOSED, "CLOSED", }, \
|
||||
{ PBLK_LINESTATE_GC, "GC", }, \
|
||||
{ PBLK_LINESTATE_BAD, "BAD", }, \
|
||||
{ PBLK_LINESTATE_CORRUPT, "CORRUPT" })
|
||||
|
||||
|
||||
#define show_pblk_state(state) __print_symbolic(state, \
|
||||
{ PBLK_STATE_RUNNING, "RUNNING", }, \
|
||||
{ PBLK_STATE_STOPPING, "STOPPING", }, \
|
||||
{ PBLK_STATE_RECOVERING, "RECOVERING", }, \
|
||||
{ PBLK_STATE_STOPPED, "STOPPED" })
|
||||
|
||||
#define show_chunk_erase_state(state) __print_symbolic(state, \
|
||||
{ PBLK_CHUNK_RESET_START, "START", }, \
|
||||
{ PBLK_CHUNK_RESET_DONE, "OK", }, \
|
||||
{ PBLK_CHUNK_RESET_FAILED, "FAILED" })
|
||||
|
||||
|
||||
TRACE_EVENT(pblk_chunk_reset,
|
||||
|
||||
TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
|
||||
|
||||
TP_ARGS(name, ppa, state),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(name, name)
|
||||
__field(u64, ppa)
|
||||
__field(int, state)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(name, name);
|
||||
__entry->ppa = ppa->ppa;
|
||||
__entry->state = state;
|
||||
),
|
||||
|
||||
TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
|
||||
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
|
||||
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
|
||||
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
|
||||
show_chunk_erase_state((int)__entry->state))
|
||||
|
||||
);
|
||||
|
||||
TRACE_EVENT(pblk_chunk_state,
|
||||
|
||||
TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
|
||||
|
||||
TP_ARGS(name, ppa, state),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(name, name)
|
||||
__field(u64, ppa)
|
||||
__field(int, state)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(name, name);
|
||||
__entry->ppa = ppa->ppa;
|
||||
__entry->state = state;
|
||||
),
|
||||
|
||||
TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
|
||||
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
|
||||
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
|
||||
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
|
||||
show_chunk_flags((int)__entry->state))
|
||||
|
||||
);
|
||||
|
||||
TRACE_EVENT(pblk_line_state,
|
||||
|
||||
TP_PROTO(const char *name, int line, int state),
|
||||
|
||||
TP_ARGS(name, line, state),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(name, name)
|
||||
__field(int, line)
|
||||
__field(int, state)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(name, name);
|
||||
__entry->line = line;
|
||||
__entry->state = state;
|
||||
),
|
||||
|
||||
TP_printk("dev=%s line=%d state=%s", __get_str(name),
|
||||
(int)__entry->line,
|
||||
show_line_state((int)__entry->state))
|
||||
|
||||
);
|
||||
|
||||
TRACE_EVENT(pblk_state,
|
||||
|
||||
TP_PROTO(const char *name, int state),
|
||||
|
||||
TP_ARGS(name, state),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(name, name)
|
||||
__field(int, state)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(name, name);
|
||||
__entry->state = state;
|
||||
),
|
||||
|
||||
TP_printk("dev=%s state=%s", __get_str(name),
|
||||
show_pblk_state((int)__entry->state))
|
||||
|
||||
);
|
||||
|
||||
#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH ../../drivers/lightnvm
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define TRACE_INCLUDE_FILE pblk-trace
|
||||
#include <trace/define_trace.h>
|
@ -1,665 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
|
||||
* Matias Bjorling <matias@cnexlabs.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* pblk-write.c - pblk's write path from write buffer to media
|
||||
*/
|
||||
|
||||
#include "pblk.h"
|
||||
#include "pblk-trace.h"
|
||||
|
||||
static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
struct pblk_c_ctx *c_ctx)
|
||||
{
|
||||
struct bio *original_bio;
|
||||
struct pblk_rb *rwb = &pblk->rwb;
|
||||
unsigned long ret;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < c_ctx->nr_valid; i++) {
|
||||
struct pblk_w_ctx *w_ctx;
|
||||
int pos = c_ctx->sentry + i;
|
||||
int flags;
|
||||
|
||||
w_ctx = pblk_rb_w_ctx(rwb, pos);
|
||||
flags = READ_ONCE(w_ctx->flags);
|
||||
|
||||
if (flags & PBLK_FLUSH_ENTRY) {
|
||||
flags &= ~PBLK_FLUSH_ENTRY;
|
||||
/* Release flags on context. Protect from writes */
|
||||
smp_store_release(&w_ctx->flags, flags);
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_dec(&rwb->inflight_flush_point);
|
||||
#endif
|
||||
}
|
||||
|
||||
while ((original_bio = bio_list_pop(&w_ctx->bios)))
|
||||
bio_endio(original_bio);
|
||||
}
|
||||
|
||||
if (c_ctx->nr_padded)
|
||||
pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
|
||||
c_ctx->nr_padded);
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_add(rqd->nr_ppas, &pblk->sync_writes);
|
||||
#endif
|
||||
|
||||
ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
|
||||
|
||||
bio_put(rqd->bio);
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned long pblk_end_queued_w_bio(struct pblk *pblk,
|
||||
struct nvm_rq *rqd,
|
||||
struct pblk_c_ctx *c_ctx)
|
||||
{
|
||||
list_del(&c_ctx->list);
|
||||
return pblk_end_w_bio(pblk, rqd, c_ctx);
|
||||
}
|
||||
|
||||
static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
struct pblk_c_ctx *c_ctx)
|
||||
{
|
||||
struct pblk_c_ctx *c, *r;
|
||||
unsigned long flags;
|
||||
unsigned long pos;
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes);
|
||||
#endif
|
||||
pblk_up_rq(pblk, c_ctx->lun_bitmap);
|
||||
|
||||
pos = pblk_rb_sync_init(&pblk->rwb, &flags);
|
||||
if (pos == c_ctx->sentry) {
|
||||
pos = pblk_end_w_bio(pblk, rqd, c_ctx);
|
||||
|
||||
retry:
|
||||
list_for_each_entry_safe(c, r, &pblk->compl_list, list) {
|
||||
rqd = nvm_rq_from_c_ctx(c);
|
||||
if (c->sentry == pos) {
|
||||
pos = pblk_end_queued_w_bio(pblk, rqd, c);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
WARN_ON(nvm_rq_from_c_ctx(c_ctx) != rqd);
|
||||
list_add_tail(&c_ctx->list, &pblk->compl_list);
|
||||
}
|
||||
pblk_rb_sync_end(&pblk->rwb, &flags);
|
||||
}
|
||||
|
||||
/* Map remaining sectors in chunk, starting from ppa */
|
||||
static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa,
|
||||
int rqd_ppas)
|
||||
{
|
||||
struct pblk_line *line;
|
||||
struct ppa_addr map_ppa = *ppa;
|
||||
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
|
||||
__le64 *lba_list;
|
||||
u64 paddr;
|
||||
int done = 0;
|
||||
int n = 0;
|
||||
|
||||
line = pblk_ppa_to_line(pblk, *ppa);
|
||||
lba_list = emeta_to_lbas(pblk, line->emeta->buf);
|
||||
|
||||
spin_lock(&line->lock);
|
||||
|
||||
while (!done) {
|
||||
paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa);
|
||||
|
||||
if (!test_and_set_bit(paddr, line->map_bitmap))
|
||||
line->left_msecs--;
|
||||
|
||||
if (n < rqd_ppas && lba_list[paddr] != addr_empty)
|
||||
line->nr_valid_lbas--;
|
||||
|
||||
lba_list[paddr] = addr_empty;
|
||||
|
||||
if (!test_and_set_bit(paddr, line->invalid_bitmap))
|
||||
le32_add_cpu(line->vsc, -1);
|
||||
|
||||
done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa);
|
||||
|
||||
n++;
|
||||
}
|
||||
|
||||
line->w_err_gc->has_write_err = 1;
|
||||
spin_unlock(&line->lock);
|
||||
}
|
||||
|
||||
static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
|
||||
unsigned int nr_entries)
|
||||
{
|
||||
struct pblk_rb *rb = &pblk->rwb;
|
||||
struct pblk_rb_entry *entry;
|
||||
struct pblk_line *line;
|
||||
struct pblk_w_ctx *w_ctx;
|
||||
struct ppa_addr ppa_l2p;
|
||||
int flags;
|
||||
unsigned int i;
|
||||
|
||||
spin_lock(&pblk->trans_lock);
|
||||
for (i = 0; i < nr_entries; i++) {
|
||||
entry = &rb->entries[pblk_rb_ptr_wrap(rb, sentry, i)];
|
||||
w_ctx = &entry->w_ctx;
|
||||
|
||||
/* Check if the lba has been overwritten */
|
||||
if (w_ctx->lba != ADDR_EMPTY) {
|
||||
ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba);
|
||||
if (!pblk_ppa_comp(ppa_l2p, entry->cacheline))
|
||||
w_ctx->lba = ADDR_EMPTY;
|
||||
}
|
||||
|
||||
/* Mark up the entry as submittable again */
|
||||
flags = READ_ONCE(w_ctx->flags);
|
||||
flags |= PBLK_WRITTEN_DATA;
|
||||
/* Release flags on write context. Protect from writes */
|
||||
smp_store_release(&w_ctx->flags, flags);
|
||||
|
||||
/* Decrease the reference count to the line as we will
|
||||
* re-map these entries
|
||||
*/
|
||||
line = pblk_ppa_to_line(pblk, w_ctx->ppa);
|
||||
atomic_dec(&line->sec_to_update);
|
||||
kref_put(&line->ref, pblk_line_put);
|
||||
}
|
||||
spin_unlock(&pblk->trans_lock);
|
||||
}
|
||||
|
||||
static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx)
|
||||
{
|
||||
struct pblk_c_ctx *r_ctx;
|
||||
|
||||
r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL);
|
||||
if (!r_ctx)
|
||||
return;
|
||||
|
||||
r_ctx->lun_bitmap = NULL;
|
||||
r_ctx->sentry = c_ctx->sentry;
|
||||
r_ctx->nr_valid = c_ctx->nr_valid;
|
||||
r_ctx->nr_padded = c_ctx->nr_padded;
|
||||
|
||||
spin_lock(&pblk->resubmit_lock);
|
||||
list_add_tail(&r_ctx->list, &pblk->resubmit_list);
|
||||
spin_unlock(&pblk->resubmit_lock);
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void pblk_submit_rec(struct work_struct *work)
|
||||
{
|
||||
struct pblk_rec_ctx *recovery =
|
||||
container_of(work, struct pblk_rec_ctx, ws_rec);
|
||||
struct pblk *pblk = recovery->pblk;
|
||||
struct nvm_rq *rqd = recovery->rqd;
|
||||
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
|
||||
pblk_log_write_err(pblk, rqd);
|
||||
|
||||
pblk_map_remaining(pblk, ppa_list, rqd->nr_ppas);
|
||||
pblk_queue_resubmit(pblk, c_ctx);
|
||||
|
||||
pblk_up_rq(pblk, c_ctx->lun_bitmap);
|
||||
if (c_ctx->nr_padded)
|
||||
pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
|
||||
c_ctx->nr_padded);
|
||||
bio_put(rqd->bio);
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
|
||||
mempool_free(recovery, &pblk->rec_pool);
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
pblk_write_kick(pblk);
|
||||
}
|
||||
|
||||
|
||||
static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
struct pblk_rec_ctx *recovery;
|
||||
|
||||
recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC);
|
||||
if (!recovery) {
|
||||
pblk_err(pblk, "could not allocate recovery work\n");
|
||||
return;
|
||||
}
|
||||
|
||||
recovery->pblk = pblk;
|
||||
recovery->rqd = rqd;
|
||||
|
||||
INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
|
||||
queue_work(pblk->close_wq, &recovery->ws_rec);
|
||||
}
|
||||
|
||||
static void pblk_end_io_write(struct nvm_rq *rqd)
|
||||
{
|
||||
struct pblk *pblk = rqd->private;
|
||||
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
|
||||
|
||||
if (rqd->error) {
|
||||
pblk_end_w_fail(pblk, rqd);
|
||||
return;
|
||||
} else {
|
||||
if (trace_pblk_chunk_state_enabled())
|
||||
pblk_check_chunk_state_update(pblk, rqd);
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
pblk_complete_write(pblk, rqd, c_ctx);
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
}
|
||||
|
||||
static void pblk_end_io_write_meta(struct nvm_rq *rqd)
|
||||
{
|
||||
struct pblk *pblk = rqd->private;
|
||||
struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct pblk_line *line = m_ctx->private;
|
||||
struct pblk_emeta *emeta = line->emeta;
|
||||
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
int sync;
|
||||
|
||||
pblk_up_chunk(pblk, ppa_list[0]);
|
||||
|
||||
if (rqd->error) {
|
||||
pblk_log_write_err(pblk, rqd);
|
||||
pblk_err(pblk, "metadata I/O failed. Line %d\n", line->id);
|
||||
line->w_err_gc->has_write_err = 1;
|
||||
} else {
|
||||
if (trace_pblk_chunk_state_enabled())
|
||||
pblk_check_chunk_state_update(pblk, rqd);
|
||||
}
|
||||
|
||||
sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
|
||||
if (sync == emeta->nr_entries)
|
||||
pblk_gen_run_ws(pblk, line, NULL, pblk_line_close_ws,
|
||||
GFP_ATOMIC, pblk->close_wq);
|
||||
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
}
|
||||
|
||||
static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
unsigned int nr_secs, nvm_end_io_fn(*end_io))
|
||||
{
|
||||
/* Setup write request */
|
||||
rqd->opcode = NVM_OP_PWRITE;
|
||||
rqd->nr_ppas = nr_secs;
|
||||
rqd->is_seq = 1;
|
||||
rqd->private = pblk;
|
||||
rqd->end_io = end_io;
|
||||
|
||||
return pblk_alloc_rqd_meta(pblk, rqd);
|
||||
}
|
||||
|
||||
static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
struct ppa_addr *erase_ppa)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_line *e_line = pblk_line_get_erase(pblk);
|
||||
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
|
||||
unsigned int valid = c_ctx->nr_valid;
|
||||
unsigned int padded = c_ctx->nr_padded;
|
||||
unsigned int nr_secs = valid + padded;
|
||||
unsigned long *lun_bitmap;
|
||||
int ret;
|
||||
|
||||
lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
|
||||
if (!lun_bitmap)
|
||||
return -ENOMEM;
|
||||
c_ctx->lun_bitmap = lun_bitmap;
|
||||
|
||||
ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
|
||||
if (ret) {
|
||||
kfree(lun_bitmap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
|
||||
ret = pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
|
||||
valid, 0);
|
||||
else
|
||||
ret = pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
|
||||
valid, erase_ppa);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
|
||||
unsigned int secs_to_flush)
|
||||
{
|
||||
int secs_to_sync;
|
||||
|
||||
secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true);
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
if ((!secs_to_sync && secs_to_flush)
|
||||
|| (secs_to_sync < 0)
|
||||
|| (secs_to_sync > secs_avail && !secs_to_flush)) {
|
||||
pblk_err(pblk, "bad sector calculation (a:%d,s:%d,f:%d)\n",
|
||||
secs_avail, secs_to_sync, secs_to_flush);
|
||||
}
|
||||
#endif
|
||||
|
||||
return secs_to_sync;
|
||||
}
|
||||
|
||||
int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_emeta *emeta = meta_line->emeta;
|
||||
struct ppa_addr *ppa_list;
|
||||
struct pblk_g_ctx *m_ctx;
|
||||
struct nvm_rq *rqd;
|
||||
void *data;
|
||||
u64 paddr;
|
||||
int rq_ppas = pblk->min_write_pgs;
|
||||
int id = meta_line->id;
|
||||
int rq_len;
|
||||
int i, j;
|
||||
int ret;
|
||||
|
||||
rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
|
||||
|
||||
m_ctx = nvm_rq_to_pdu(rqd);
|
||||
m_ctx->private = meta_line;
|
||||
|
||||
rq_len = rq_ppas * geo->csecs;
|
||||
data = ((void *)emeta->buf) + emeta->mem;
|
||||
|
||||
ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
|
||||
if (ret)
|
||||
goto fail_free_rqd;
|
||||
|
||||
ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
for (i = 0; i < rqd->nr_ppas; ) {
|
||||
spin_lock(&meta_line->lock);
|
||||
paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
|
||||
spin_unlock(&meta_line->lock);
|
||||
for (j = 0; j < rq_ppas; j++, i++, paddr++)
|
||||
ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
|
||||
}
|
||||
|
||||
spin_lock(&l_mg->close_lock);
|
||||
emeta->mem += rq_len;
|
||||
if (emeta->mem >= lm->emeta_len[0])
|
||||
list_del(&meta_line->list);
|
||||
spin_unlock(&l_mg->close_lock);
|
||||
|
||||
pblk_down_chunk(pblk, ppa_list[0]);
|
||||
|
||||
ret = pblk_submit_io(pblk, rqd, data);
|
||||
if (ret) {
|
||||
pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
|
||||
goto fail_rollback;
|
||||
}
|
||||
|
||||
return NVM_IO_OK;
|
||||
|
||||
fail_rollback:
|
||||
pblk_up_chunk(pblk, ppa_list[0]);
|
||||
spin_lock(&l_mg->close_lock);
|
||||
pblk_dealloc_page(pblk, meta_line, rq_ppas);
|
||||
list_add(&meta_line->list, &meta_line->list);
|
||||
spin_unlock(&l_mg->close_lock);
|
||||
fail_free_rqd:
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool pblk_valid_meta_ppa(struct pblk *pblk,
|
||||
struct pblk_line *meta_line,
|
||||
struct nvm_rq *data_rqd)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd);
|
||||
struct pblk_line *data_line = pblk_line_get_data(pblk);
|
||||
struct ppa_addr ppa, ppa_opt;
|
||||
u64 paddr;
|
||||
int pos_opt;
|
||||
|
||||
/* Schedule a metadata I/O that is half the distance from the data I/O
|
||||
* with regards to the number of LUNs forming the pblk instance. This
|
||||
* balances LUN conflicts across every I/O.
|
||||
*
|
||||
* When the LUN configuration changes (e.g., due to GC), this distance
|
||||
* can align, which would result on metadata and data I/Os colliding. In
|
||||
* this case, modify the distance to not be optimal, but move the
|
||||
* optimal in the right direction.
|
||||
*/
|
||||
paddr = pblk_lookup_page(pblk, meta_line);
|
||||
ppa = addr_to_gen_ppa(pblk, paddr, 0);
|
||||
ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
|
||||
pos_opt = pblk_ppa_to_pos(geo, ppa_opt);
|
||||
|
||||
if (test_bit(pos_opt, data_c_ctx->lun_bitmap) ||
|
||||
test_bit(pos_opt, data_line->blk_bitmap))
|
||||
return true;
|
||||
|
||||
if (unlikely(pblk_ppa_comp(ppa_opt, ppa)))
|
||||
data_line->meta_distance--;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk,
|
||||
struct nvm_rq *data_rqd)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct pblk_line *meta_line;
|
||||
|
||||
spin_lock(&l_mg->close_lock);
|
||||
if (list_empty(&l_mg->emeta_list)) {
|
||||
spin_unlock(&l_mg->close_lock);
|
||||
return NULL;
|
||||
}
|
||||
meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
|
||||
if (meta_line->emeta->mem >= lm->emeta_len[0]) {
|
||||
spin_unlock(&l_mg->close_lock);
|
||||
return NULL;
|
||||
}
|
||||
spin_unlock(&l_mg->close_lock);
|
||||
|
||||
if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd))
|
||||
return NULL;
|
||||
|
||||
return meta_line;
|
||||
}
|
||||
|
||||
static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
struct ppa_addr erase_ppa;
|
||||
struct pblk_line *meta_line;
|
||||
int err;
|
||||
|
||||
pblk_ppa_set_empty(&erase_ppa);
|
||||
|
||||
/* Assign lbas to ppas and populate request structure */
|
||||
err = pblk_setup_w_rq(pblk, rqd, &erase_ppa);
|
||||
if (err) {
|
||||
pblk_err(pblk, "could not setup write request: %d\n", err);
|
||||
return NVM_IO_ERR;
|
||||
}
|
||||
|
||||
meta_line = pblk_should_submit_meta_io(pblk, rqd);
|
||||
|
||||
/* Submit data write for current data line */
|
||||
err = pblk_submit_io(pblk, rqd, NULL);
|
||||
if (err) {
|
||||
pblk_err(pblk, "data I/O submission failed: %d\n", err);
|
||||
return NVM_IO_ERR;
|
||||
}
|
||||
|
||||
if (!pblk_ppa_empty(erase_ppa)) {
|
||||
/* Submit erase for next data line */
|
||||
if (pblk_blk_erase_async(pblk, erase_ppa)) {
|
||||
struct pblk_line *e_line = pblk_line_get_erase(pblk);
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int bit;
|
||||
|
||||
atomic_inc(&e_line->left_eblks);
|
||||
bit = pblk_ppa_to_pos(geo, erase_ppa);
|
||||
WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
|
||||
}
|
||||
}
|
||||
|
||||
if (meta_line) {
|
||||
/* Submit metadata write for previous data line */
|
||||
err = pblk_submit_meta_io(pblk, meta_line);
|
||||
if (err) {
|
||||
pblk_err(pblk, "metadata I/O submission failed: %d",
|
||||
err);
|
||||
return NVM_IO_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
return NVM_IO_OK;
|
||||
}
|
||||
|
||||
static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct bio *bio = rqd->bio;
|
||||
|
||||
if (c_ctx->nr_padded)
|
||||
pblk_bio_free_pages(pblk, bio, c_ctx->nr_valid,
|
||||
c_ctx->nr_padded);
|
||||
}
|
||||
|
||||
static int pblk_submit_write(struct pblk *pblk, int *secs_left)
|
||||
{
|
||||
struct bio *bio;
|
||||
struct nvm_rq *rqd;
|
||||
unsigned int secs_avail, secs_to_sync, secs_to_com;
|
||||
unsigned int secs_to_flush, packed_meta_pgs;
|
||||
unsigned long pos;
|
||||
unsigned int resubmit;
|
||||
|
||||
*secs_left = 0;
|
||||
|
||||
spin_lock(&pblk->resubmit_lock);
|
||||
resubmit = !list_empty(&pblk->resubmit_list);
|
||||
spin_unlock(&pblk->resubmit_lock);
|
||||
|
||||
/* Resubmit failed writes first */
|
||||
if (resubmit) {
|
||||
struct pblk_c_ctx *r_ctx;
|
||||
|
||||
spin_lock(&pblk->resubmit_lock);
|
||||
r_ctx = list_first_entry(&pblk->resubmit_list,
|
||||
struct pblk_c_ctx, list);
|
||||
list_del(&r_ctx->list);
|
||||
spin_unlock(&pblk->resubmit_lock);
|
||||
|
||||
secs_avail = r_ctx->nr_valid;
|
||||
pos = r_ctx->sentry;
|
||||
|
||||
pblk_prepare_resubmit(pblk, pos, secs_avail);
|
||||
secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
|
||||
secs_avail);
|
||||
|
||||
kfree(r_ctx);
|
||||
} else {
|
||||
/* If there are no sectors in the cache,
|
||||
* flushes (bios without data) will be cleared on
|
||||
* the cache threads
|
||||
*/
|
||||
secs_avail = pblk_rb_read_count(&pblk->rwb);
|
||||
if (!secs_avail)
|
||||
return 0;
|
||||
|
||||
secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
|
||||
if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data)
|
||||
return 0;
|
||||
|
||||
secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
|
||||
secs_to_flush);
|
||||
if (secs_to_sync > pblk->max_write_pgs) {
|
||||
pblk_err(pblk, "bad buffer sync calculation\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
secs_to_com = (secs_to_sync > secs_avail) ?
|
||||
secs_avail : secs_to_sync;
|
||||
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
|
||||
}
|
||||
|
||||
packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data);
|
||||
bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs);
|
||||
|
||||
bio->bi_iter.bi_sector = 0; /* internal bio */
|
||||
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
|
||||
|
||||
rqd = pblk_alloc_rqd(pblk, PBLK_WRITE);
|
||||
rqd->bio = bio;
|
||||
|
||||
if (pblk_rb_read_to_bio(&pblk->rwb, rqd, pos, secs_to_sync,
|
||||
secs_avail)) {
|
||||
pblk_err(pblk, "corrupted write bio\n");
|
||||
goto fail_put_bio;
|
||||
}
|
||||
|
||||
if (pblk_submit_io_set(pblk, rqd))
|
||||
goto fail_free_bio;
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
atomic_long_add(secs_to_sync, &pblk->sub_writes);
|
||||
#endif
|
||||
|
||||
*secs_left = 1;
|
||||
return 0;
|
||||
|
||||
fail_free_bio:
|
||||
pblk_free_write_rqd(pblk, rqd);
|
||||
fail_put_bio:
|
||||
bio_put(bio);
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
|
||||
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
int pblk_write_ts(void *data)
|
||||
{
|
||||
struct pblk *pblk = data;
|
||||
int secs_left;
|
||||
int write_failure = 0;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
if (!write_failure) {
|
||||
write_failure = pblk_submit_write(pblk, &secs_left);
|
||||
|
||||
if (secs_left)
|
||||
continue;
|
||||
}
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
io_schedule();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1329,6 +1329,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
struct raid1_plug_cb *plug = NULL;
|
||||
int first_clone;
|
||||
int max_sectors;
|
||||
bool write_behind = false;
|
||||
|
||||
if (mddev_is_clustered(mddev) &&
|
||||
md_cluster_ops->area_resyncing(mddev, WRITE,
|
||||
@ -1381,6 +1382,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
max_sectors = r1_bio->sectors;
|
||||
for (i = 0; i < disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
|
||||
/*
|
||||
* The write-behind io is only attempted on drives marked as
|
||||
* write-mostly, which means we could allocate write behind
|
||||
* bio later.
|
||||
*/
|
||||
if (rdev && test_bit(WriteMostly, &rdev->flags))
|
||||
write_behind = true;
|
||||
|
||||
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
blocked_rdev = rdev;
|
||||
@ -1454,6 +1464,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
goto retry_write;
|
||||
}
|
||||
|
||||
/*
|
||||
* When using a bitmap, we may call alloc_behind_master_bio below.
|
||||
* alloc_behind_master_bio allocates a copy of the data payload a page
|
||||
* at a time and thus needs a new bio that can fit the whole payload
|
||||
* this bio in page sized chunks.
|
||||
*/
|
||||
if (write_behind && bitmap)
|
||||
max_sectors = min_t(int, max_sectors,
|
||||
BIO_MAX_VECS * (PAGE_SIZE >> 9));
|
||||
if (max_sectors < bio_sectors(bio)) {
|
||||
struct bio *split = bio_split(bio, max_sectors,
|
||||
GFP_NOIO, &conf->bio_split);
|
||||
|
@ -1712,6 +1712,11 @@ retry_discard:
|
||||
} else
|
||||
r10_bio->master_bio = (struct bio *)first_r10bio;
|
||||
|
||||
/*
|
||||
* first select target devices under rcu_lock and
|
||||
* inc refcount on their rdev. Record them by setting
|
||||
* bios[x] to bio
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (disk = 0; disk < geo->raid_disks; disk++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
@ -1743,9 +1748,6 @@ retry_discard:
|
||||
for (disk = 0; disk < geo->raid_disks; disk++) {
|
||||
sector_t dev_start, dev_end;
|
||||
struct bio *mbio, *rbio = NULL;
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
struct md_rdev *rrdev = rcu_dereference(
|
||||
conf->mirrors[disk].replacement);
|
||||
|
||||
/*
|
||||
* Now start to calculate the start and end address for each disk.
|
||||
@ -1775,9 +1777,12 @@ retry_discard:
|
||||
|
||||
/*
|
||||
* It only handles discard bio which size is >= stripe size, so
|
||||
* dev_end > dev_start all the time
|
||||
* dev_end > dev_start all the time.
|
||||
* It doesn't need to use rcu lock to get rdev here. We already
|
||||
* add rdev->nr_pending in the first loop.
|
||||
*/
|
||||
if (r10_bio->devs[disk].bio) {
|
||||
struct md_rdev *rdev = conf->mirrors[disk].rdev;
|
||||
mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
|
||||
mbio->bi_end_io = raid10_end_discard_request;
|
||||
mbio->bi_private = r10_bio;
|
||||
@ -1790,6 +1795,7 @@ retry_discard:
|
||||
bio_endio(mbio);
|
||||
}
|
||||
if (r10_bio->devs[disk].repl_bio) {
|
||||
struct md_rdev *rrdev = conf->mirrors[disk].replacement;
|
||||
rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
|
||||
rbio->bi_end_io = raid10_end_discard_request;
|
||||
rbio->bi_private = r10_bio;
|
||||
|
@ -33,12 +33,12 @@ config NVME_HWMON
|
||||
in the system.
|
||||
|
||||
config NVME_FABRICS
|
||||
select NVME_CORE
|
||||
tristate
|
||||
|
||||
config NVME_RDMA
|
||||
tristate "NVM Express over Fabrics RDMA host driver"
|
||||
depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK
|
||||
select NVME_CORE
|
||||
select NVME_FABRICS
|
||||
select SG_POOL
|
||||
help
|
||||
@ -55,7 +55,6 @@ config NVME_FC
|
||||
tristate "NVM Express over Fabrics FC host driver"
|
||||
depends on BLOCK
|
||||
depends on HAS_DMA
|
||||
select NVME_CORE
|
||||
select NVME_FABRICS
|
||||
select SG_POOL
|
||||
help
|
||||
@ -72,7 +71,6 @@ config NVME_TCP
|
||||
tristate "NVM Express over Fabrics TCP host driver"
|
||||
depends on INET
|
||||
depends on BLOCK
|
||||
select NVME_CORE
|
||||
select NVME_FABRICS
|
||||
select CRYPTO
|
||||
select CRYPTO_CRC32C
|
||||
|
@ -12,7 +12,6 @@ obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
|
||||
nvme-core-y := core.o ioctl.o
|
||||
nvme-core-$(CONFIG_TRACING) += trace.o
|
||||
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
|
||||
nvme-core-$(CONFIG_NVM) += lightnvm.o
|
||||
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
|
||||
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
|
||||
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
|
||||
|
@ -587,9 +587,6 @@ static void nvme_free_ns(struct kref *kref)
|
||||
{
|
||||
struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
|
||||
|
||||
if (ns->ndev)
|
||||
nvme_nvm_unregister(ns);
|
||||
|
||||
put_disk(ns->disk);
|
||||
nvme_put_ns_head(ns->head);
|
||||
nvme_put_ctrl(ns->ctrl);
|
||||
@ -1028,7 +1025,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
cmd->common.command_id = req->tag;
|
||||
nvme_req(req)->genctr++;
|
||||
cmd->common.command_id = nvme_cid(req);
|
||||
trace_nvme_setup_cmd(req, cmd);
|
||||
return ret;
|
||||
}
|
||||
@ -3217,9 +3215,6 @@ static const struct attribute_group nvme_ns_id_attr_group = {
|
||||
|
||||
const struct attribute_group *nvme_ns_id_attr_groups[] = {
|
||||
&nvme_ns_id_attr_group,
|
||||
#ifdef CONFIG_NVM
|
||||
&nvme_nvm_attr_group,
|
||||
#endif
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -3762,13 +3757,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
if (nvme_update_ns_info(ns, id))
|
||||
goto out_unlink_ns;
|
||||
|
||||
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
|
||||
if (nvme_nvm_register(ns, disk->disk_name, node)) {
|
||||
dev_warn(ctrl->device, "LightNVM init failure\n");
|
||||
goto out_unlink_ns;
|
||||
}
|
||||
}
|
||||
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
list_add_tail(&ns->list, &ctrl->namespaces);
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
|
@ -719,7 +719,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
nvmf_host_put(opts->host);
|
||||
opts->host = nvmf_host_add(p);
|
||||
kfree(p);
|
||||
if (!opts->host) {
|
||||
|
@ -342,9 +342,7 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
|
||||
case NVME_IOCTL_IO64_CMD:
|
||||
return nvme_user_cmd64(ns->ctrl, ns, argp);
|
||||
default:
|
||||
if (!ns->ndev)
|
||||
return -ENOTTY;
|
||||
return nvme_nvm_ioctl(ns, cmd, argp);
|
||||
return -ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,6 @@
|
||||
#include <linux/pci.h>
|
||||
#include <linux/kref.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/lightnvm.h>
|
||||
#include <linux/sed-opal.h>
|
||||
#include <linux/fault-inject.h>
|
||||
#include <linux/rcupdate.h>
|
||||
@ -48,11 +47,6 @@ extern struct workqueue_struct *nvme_wq;
|
||||
extern struct workqueue_struct *nvme_reset_wq;
|
||||
extern struct workqueue_struct *nvme_delete_wq;
|
||||
|
||||
enum {
|
||||
NVME_NS_LBA = 0,
|
||||
NVME_NS_LIGHTNVM = 1,
|
||||
};
|
||||
|
||||
/*
|
||||
* List of workarounds for devices that required behavior not specified in
|
||||
* the standard.
|
||||
@ -92,11 +86,6 @@ enum nvme_quirks {
|
||||
*/
|
||||
NVME_QUIRK_NO_DEEPEST_PS = (1 << 5),
|
||||
|
||||
/*
|
||||
* Supports the LighNVM command set if indicated in vs[1].
|
||||
*/
|
||||
NVME_QUIRK_LIGHTNVM = (1 << 6),
|
||||
|
||||
/*
|
||||
* Set MEDIUM priority on SQ creation
|
||||
*/
|
||||
@ -158,6 +147,7 @@ enum nvme_quirks {
|
||||
struct nvme_request {
|
||||
struct nvme_command *cmd;
|
||||
union nvme_result result;
|
||||
u8 genctr;
|
||||
u8 retries;
|
||||
u8 flags;
|
||||
u16 status;
|
||||
@ -449,7 +439,6 @@ struct nvme_ns {
|
||||
u32 ana_grpid;
|
||||
#endif
|
||||
struct list_head siblings;
|
||||
struct nvm_dev *ndev;
|
||||
struct kref kref;
|
||||
struct nvme_ns_head *head;
|
||||
|
||||
@ -497,6 +486,49 @@ struct nvme_ctrl_ops {
|
||||
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
|
||||
};
|
||||
|
||||
/*
|
||||
* nvme command_id is constructed as such:
|
||||
* | xxxx | xxxxxxxxxxxx |
|
||||
* gen request tag
|
||||
*/
|
||||
#define nvme_genctr_mask(gen) (gen & 0xf)
|
||||
#define nvme_cid_install_genctr(gen) (nvme_genctr_mask(gen) << 12)
|
||||
#define nvme_genctr_from_cid(cid) ((cid & 0xf000) >> 12)
|
||||
#define nvme_tag_from_cid(cid) (cid & 0xfff)
|
||||
|
||||
static inline u16 nvme_cid(struct request *rq)
|
||||
{
|
||||
return nvme_cid_install_genctr(nvme_req(rq)->genctr) | rq->tag;
|
||||
}
|
||||
|
||||
static inline struct request *nvme_find_rq(struct blk_mq_tags *tags,
|
||||
u16 command_id)
|
||||
{
|
||||
u8 genctr = nvme_genctr_from_cid(command_id);
|
||||
u16 tag = nvme_tag_from_cid(command_id);
|
||||
struct request *rq;
|
||||
|
||||
rq = blk_mq_tag_to_rq(tags, tag);
|
||||
if (unlikely(!rq)) {
|
||||
pr_err("could not locate request for tag %#x\n",
|
||||
tag);
|
||||
return NULL;
|
||||
}
|
||||
if (unlikely(nvme_genctr_mask(nvme_req(rq)->genctr) != genctr)) {
|
||||
dev_err(nvme_req(rq)->ctrl->device,
|
||||
"request %#x genctr mismatch (got %#x expected %#x)\n",
|
||||
tag, genctr, nvme_genctr_mask(nvme_req(rq)->genctr));
|
||||
return NULL;
|
||||
}
|
||||
return rq;
|
||||
}
|
||||
|
||||
static inline struct request *nvme_cid_to_rq(struct blk_mq_tags *tags,
|
||||
u16 command_id)
|
||||
{
|
||||
return blk_mq_tag_to_rq(tags, nvme_tag_from_cid(command_id));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
|
||||
void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
|
||||
const char *dev_name);
|
||||
@ -594,7 +626,8 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
|
||||
|
||||
static inline bool nvme_is_aen_req(u16 qid, __u16 command_id)
|
||||
{
|
||||
return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH;
|
||||
return !qid &&
|
||||
nvme_tag_from_cid(command_id) >= NVME_AQ_BLK_MQ_DEPTH;
|
||||
}
|
||||
|
||||
void nvme_complete_rq(struct request *req);
|
||||
@ -823,26 +856,6 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVM
|
||||
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
|
||||
void nvme_nvm_unregister(struct nvme_ns *ns);
|
||||
extern const struct attribute_group nvme_nvm_attr_group;
|
||||
int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp);
|
||||
#else
|
||||
static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
|
||||
int node)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void nvme_nvm_unregister(struct nvme_ns *ns) {};
|
||||
static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
|
||||
void __user *argp)
|
||||
{
|
||||
return -ENOTTY;
|
||||
}
|
||||
#endif /* CONFIG_NVM */
|
||||
|
||||
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
|
||||
{
|
||||
return dev_to_disk(dev)->private_data;
|
||||
|
@ -60,6 +60,8 @@ MODULE_PARM_DESC(sgl_threshold,
|
||||
"Use SGLs when average request segment size is larger or equal to "
|
||||
"this size. Use 0 to disable SGLs.");
|
||||
|
||||
#define NVME_PCI_MIN_QUEUE_SIZE 2
|
||||
#define NVME_PCI_MAX_QUEUE_SIZE 4095
|
||||
static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
|
||||
static const struct kernel_param_ops io_queue_depth_ops = {
|
||||
.set = io_queue_depth_set,
|
||||
@ -68,7 +70,7 @@ static const struct kernel_param_ops io_queue_depth_ops = {
|
||||
|
||||
static unsigned int io_queue_depth = 1024;
|
||||
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
|
||||
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
|
||||
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096");
|
||||
|
||||
static int io_queue_count_set(const char *val, const struct kernel_param *kp)
|
||||
{
|
||||
@ -135,6 +137,7 @@ struct nvme_dev {
|
||||
u32 cmbloc;
|
||||
struct nvme_ctrl ctrl;
|
||||
u32 last_ps;
|
||||
bool hmb;
|
||||
|
||||
mempool_t *iod_mempool;
|
||||
|
||||
@ -153,18 +156,14 @@ struct nvme_dev {
|
||||
unsigned int nr_allocated_queues;
|
||||
unsigned int nr_write_queues;
|
||||
unsigned int nr_poll_queues;
|
||||
|
||||
bool attrs_added;
|
||||
};
|
||||
|
||||
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
|
||||
{
|
||||
int ret;
|
||||
u32 n;
|
||||
|
||||
ret = kstrtou32(val, 10, &n);
|
||||
if (ret != 0 || n < 2)
|
||||
return -EINVAL;
|
||||
|
||||
return param_set_uint(val, kp);
|
||||
return param_set_uint_minmax(val, kp, NVME_PCI_MIN_QUEUE_SIZE,
|
||||
NVME_PCI_MAX_QUEUE_SIZE);
|
||||
}
|
||||
|
||||
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
|
||||
@ -1014,7 +1013,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
|
||||
return;
|
||||
}
|
||||
|
||||
req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), command_id);
|
||||
req = nvme_find_rq(nvme_queue_tagset(nvmeq), command_id);
|
||||
if (unlikely(!req)) {
|
||||
dev_warn(nvmeq->dev->ctrl.device,
|
||||
"invalid id %d completed on queue %d\n",
|
||||
@ -1808,17 +1807,6 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
|
||||
return ret >= 0 ? 0 : ret;
|
||||
}
|
||||
|
||||
static ssize_t nvme_cmb_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz : x%08x\n",
|
||||
ndev->cmbloc, ndev->cmbsz);
|
||||
}
|
||||
static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL);
|
||||
|
||||
static u64 nvme_cmb_size_unit(struct nvme_dev *dev)
|
||||
{
|
||||
u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK;
|
||||
@ -1887,20 +1875,6 @@ static void nvme_map_cmb(struct nvme_dev *dev)
|
||||
if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
|
||||
(NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
|
||||
pci_p2pmem_publish(pdev, true);
|
||||
|
||||
if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
|
||||
&dev_attr_cmb.attr, NULL))
|
||||
dev_warn(dev->ctrl.device,
|
||||
"failed to add sysfs attribute for CMB\n");
|
||||
}
|
||||
|
||||
static inline void nvme_release_cmb(struct nvme_dev *dev)
|
||||
{
|
||||
if (dev->cmb_size) {
|
||||
sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
|
||||
&dev_attr_cmb.attr, NULL);
|
||||
dev->cmb_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
|
||||
@ -1923,7 +1897,9 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
|
||||
dev_warn(dev->ctrl.device,
|
||||
"failed to set host mem (err %d, flags %#x).\n",
|
||||
ret, bits);
|
||||
}
|
||||
} else
|
||||
dev->hmb = bits & NVME_HOST_MEM_ENABLE;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2080,6 +2056,102 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t cmb_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
|
||||
|
||||
return sysfs_emit(buf, "cmbloc : x%08x\ncmbsz : x%08x\n",
|
||||
ndev->cmbloc, ndev->cmbsz);
|
||||
}
|
||||
static DEVICE_ATTR_RO(cmb);
|
||||
|
||||
static ssize_t cmbloc_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
|
||||
|
||||
return sysfs_emit(buf, "%u\n", ndev->cmbloc);
|
||||
}
|
||||
static DEVICE_ATTR_RO(cmbloc);
|
||||
|
||||
static ssize_t cmbsz_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
|
||||
|
||||
return sysfs_emit(buf, "%u\n", ndev->cmbsz);
|
||||
}
|
||||
static DEVICE_ATTR_RO(cmbsz);
|
||||
|
||||
static ssize_t hmb_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
|
||||
|
||||
return sysfs_emit(buf, "%d\n", ndev->hmb);
|
||||
}
|
||||
|
||||
static ssize_t hmb_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
|
||||
bool new;
|
||||
int ret;
|
||||
|
||||
if (strtobool(buf, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (new == ndev->hmb)
|
||||
return count;
|
||||
|
||||
if (new) {
|
||||
ret = nvme_setup_host_mem(ndev);
|
||||
} else {
|
||||
ret = nvme_set_host_mem(ndev, 0);
|
||||
if (!ret)
|
||||
nvme_free_host_mem(ndev);
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return count;
|
||||
}
|
||||
static DEVICE_ATTR_RW(hmb);
|
||||
|
||||
static umode_t nvme_pci_attrs_are_visible(struct kobject *kobj,
|
||||
struct attribute *a, int n)
|
||||
{
|
||||
struct nvme_ctrl *ctrl =
|
||||
dev_get_drvdata(container_of(kobj, struct device, kobj));
|
||||
struct nvme_dev *dev = to_nvme_dev(ctrl);
|
||||
|
||||
if (a == &dev_attr_cmb.attr ||
|
||||
a == &dev_attr_cmbloc.attr ||
|
||||
a == &dev_attr_cmbsz.attr) {
|
||||
if (!dev->cmbsz)
|
||||
return 0;
|
||||
}
|
||||
if (a == &dev_attr_hmb.attr && !ctrl->hmpre)
|
||||
return 0;
|
||||
|
||||
return a->mode;
|
||||
}
|
||||
|
||||
static struct attribute *nvme_pci_attrs[] = {
|
||||
&dev_attr_cmb.attr,
|
||||
&dev_attr_cmbloc.attr,
|
||||
&dev_attr_cmbsz.attr,
|
||||
&dev_attr_hmb.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group nvme_pci_attr_group = {
|
||||
.attrs = nvme_pci_attrs,
|
||||
.is_visible = nvme_pci_attrs_are_visible,
|
||||
};
|
||||
|
||||
/*
|
||||
* nirqs is the number of interrupts available for write and read
|
||||
* queues. The core already reserved an interrupt for the admin queue.
|
||||
@ -2751,6 +2823,10 @@ static void nvme_reset_work(struct work_struct *work)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!dev->attrs_added && !sysfs_create_group(&dev->ctrl.device->kobj,
|
||||
&nvme_pci_attr_group))
|
||||
dev->attrs_added = true;
|
||||
|
||||
nvme_start_ctrl(&dev->ctrl);
|
||||
return;
|
||||
|
||||
@ -2999,6 +3075,13 @@ static void nvme_shutdown(struct pci_dev *pdev)
|
||||
nvme_disable_prepare_reset(dev, true);
|
||||
}
|
||||
|
||||
static void nvme_remove_attrs(struct nvme_dev *dev)
|
||||
{
|
||||
if (dev->attrs_added)
|
||||
sysfs_remove_group(&dev->ctrl.device->kobj,
|
||||
&nvme_pci_attr_group);
|
||||
}
|
||||
|
||||
/*
|
||||
* The driver's remove may be called on a device in a partially initialized
|
||||
* state. This function must not have any dependencies on the device state in
|
||||
@ -3020,7 +3103,7 @@ static void nvme_remove(struct pci_dev *pdev)
|
||||
nvme_stop_ctrl(&dev->ctrl);
|
||||
nvme_remove_namespaces(&dev->ctrl);
|
||||
nvme_dev_disable(dev, true);
|
||||
nvme_release_cmb(dev);
|
||||
nvme_remove_attrs(dev);
|
||||
nvme_free_host_mem(dev);
|
||||
nvme_dev_remove_admin(dev);
|
||||
nvme_free_queues(dev, 0);
|
||||
@ -3047,8 +3130,13 @@ static int nvme_resume(struct device *dev)
|
||||
|
||||
if (ndev->last_ps == U32_MAX ||
|
||||
nvme_set_power_state(ctrl, ndev->last_ps) != 0)
|
||||
return nvme_try_sched_reset(&ndev->ctrl);
|
||||
goto reset;
|
||||
if (ctrl->hmpre && nvme_setup_host_mem(ndev))
|
||||
goto reset;
|
||||
|
||||
return 0;
|
||||
reset:
|
||||
return nvme_try_sched_reset(ctrl);
|
||||
}
|
||||
|
||||
static int nvme_suspend(struct device *dev)
|
||||
@ -3072,15 +3160,9 @@ static int nvme_suspend(struct device *dev)
|
||||
* the PCI bus layer to put it into D3 in order to take the PCIe link
|
||||
* down, so as to allow the platform to achieve its minimum low-power
|
||||
* state (which may not be possible if the link is up).
|
||||
*
|
||||
* If a host memory buffer is enabled, shut down the device as the NVMe
|
||||
* specification allows the device to access the host memory buffer in
|
||||
* host DRAM from all power states, but hosts will fail access to DRAM
|
||||
* during S3.
|
||||
*/
|
||||
if (pm_suspend_via_firmware() || !ctrl->npss ||
|
||||
!pcie_aspm_enabled(pdev) ||
|
||||
ndev->nr_host_mem_descs ||
|
||||
(ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
|
||||
return nvme_disable_prepare_reset(ndev, true);
|
||||
|
||||
@ -3091,6 +3173,17 @@ static int nvme_suspend(struct device *dev)
|
||||
if (ctrl->state != NVME_CTRL_LIVE)
|
||||
goto unfreeze;
|
||||
|
||||
/*
|
||||
* Host memory access may not be successful in a system suspend state,
|
||||
* but the specification allows the controller to access memory in a
|
||||
* non-operational power state.
|
||||
*/
|
||||
if (ndev->hmb) {
|
||||
ret = nvme_set_host_mem(ndev, 0);
|
||||
if (ret < 0)
|
||||
goto unfreeze;
|
||||
}
|
||||
|
||||
ret = nvme_get_power_state(ctrl, &ndev->last_ps);
|
||||
if (ret < 0)
|
||||
goto unfreeze;
|
||||
@ -3243,12 +3336,6 @@ static const struct pci_device_id nvme_id_table[] = {
|
||||
{ PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
|
||||
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
|
||||
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
||||
{ PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */
|
||||
.driver_data = NVME_QUIRK_LIGHTNVM, },
|
||||
{ PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
|
||||
.driver_data = NVME_QUIRK_LIGHTNVM, },
|
||||
{ PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */
|
||||
.driver_data = NVME_QUIRK_LIGHTNVM, },
|
||||
{ PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
|
||||
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
||||
{ PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */
|
||||
|
@ -735,13 +735,13 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ctrl->ctrl.queue_count = nr_io_queues + 1;
|
||||
if (ctrl->ctrl.queue_count < 2) {
|
||||
if (nr_io_queues == 0) {
|
||||
dev_err(ctrl->ctrl.device,
|
||||
"unable to set any I/O queues\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ctrl->ctrl.queue_count = nr_io_queues + 1;
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"creating %d I/O queues.\n", nr_io_queues);
|
||||
|
||||
@ -1730,10 +1730,10 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
|
||||
struct request *rq;
|
||||
struct nvme_rdma_request *req;
|
||||
|
||||
rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
|
||||
rq = nvme_find_rq(nvme_rdma_tagset(queue), cqe->command_id);
|
||||
if (!rq) {
|
||||
dev_err(queue->ctrl->ctrl.device,
|
||||
"tag 0x%x on QP %#x not found\n",
|
||||
"got bad command_id %#x on QP %#x\n",
|
||||
cqe->command_id, queue->qp->qp_num);
|
||||
nvme_rdma_error_recovery(queue->ctrl);
|
||||
return;
|
||||
|
@ -487,11 +487,11 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
|
||||
{
|
||||
struct request *rq;
|
||||
|
||||
rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id);
|
||||
rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
|
||||
if (!rq) {
|
||||
dev_err(queue->ctrl->ctrl.device,
|
||||
"queue %d tag 0x%x not found\n",
|
||||
nvme_tcp_queue_id(queue), cqe->command_id);
|
||||
"got bad cqe.command_id %#x on queue %d\n",
|
||||
cqe->command_id, nvme_tcp_queue_id(queue));
|
||||
nvme_tcp_error_recovery(&queue->ctrl->ctrl);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -508,11 +508,11 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
|
||||
{
|
||||
struct request *rq;
|
||||
|
||||
rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
|
||||
rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
|
||||
if (!rq) {
|
||||
dev_err(queue->ctrl->ctrl.device,
|
||||
"queue %d tag %#x not found\n",
|
||||
nvme_tcp_queue_id(queue), pdu->command_id);
|
||||
"got bad c2hdata.command_id %#x on queue %d\n",
|
||||
pdu->command_id, nvme_tcp_queue_id(queue));
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
@ -606,7 +606,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
|
||||
data->hdr.plen =
|
||||
cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
|
||||
data->ttag = pdu->ttag;
|
||||
data->command_id = rq->tag;
|
||||
data->command_id = nvme_cid(rq);
|
||||
data->data_offset = cpu_to_le32(req->data_sent);
|
||||
data->data_length = cpu_to_le32(req->pdu_len);
|
||||
return 0;
|
||||
@ -619,11 +619,11 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
|
||||
struct request *rq;
|
||||
int ret;
|
||||
|
||||
rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
|
||||
rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
|
||||
if (!rq) {
|
||||
dev_err(queue->ctrl->ctrl.device,
|
||||
"queue %d tag %#x not found\n",
|
||||
nvme_tcp_queue_id(queue), pdu->command_id);
|
||||
"got bad r2t.command_id %#x on queue %d\n",
|
||||
pdu->command_id, nvme_tcp_queue_id(queue));
|
||||
return -ENOENT;
|
||||
}
|
||||
req = blk_mq_rq_to_pdu(rq);
|
||||
@ -702,17 +702,9 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
|
||||
unsigned int *offset, size_t *len)
|
||||
{
|
||||
struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
|
||||
struct nvme_tcp_request *req;
|
||||
struct request *rq;
|
||||
|
||||
rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
|
||||
if (!rq) {
|
||||
dev_err(queue->ctrl->ctrl.device,
|
||||
"queue %d tag %#x not found\n",
|
||||
nvme_tcp_queue_id(queue), pdu->command_id);
|
||||
return -ENOENT;
|
||||
}
|
||||
req = blk_mq_rq_to_pdu(rq);
|
||||
struct request *rq =
|
||||
nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
|
||||
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
|
||||
|
||||
while (true) {
|
||||
int recv_len, ret;
|
||||
@ -804,8 +796,8 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
|
||||
}
|
||||
|
||||
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
|
||||
struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
|
||||
pdu->command_id);
|
||||
struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
|
||||
pdu->command_id);
|
||||
|
||||
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
|
||||
queue->nr_cqe++;
|
||||
@ -1228,6 +1220,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
|
||||
|
||||
sock_release(queue->sock);
|
||||
kfree(queue->pdu);
|
||||
mutex_destroy(&queue->send_mutex);
|
||||
mutex_destroy(&queue->queue_lock);
|
||||
}
|
||||
|
||||
@ -1533,6 +1526,7 @@ err_sock:
|
||||
sock_release(queue->sock);
|
||||
queue->sock = NULL;
|
||||
err_destroy_mutex:
|
||||
mutex_destroy(&queue->send_mutex);
|
||||
mutex_destroy(&queue->queue_lock);
|
||||
return ret;
|
||||
}
|
||||
@ -1769,13 +1763,13 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ctrl->queue_count = nr_io_queues + 1;
|
||||
if (ctrl->queue_count < 2) {
|
||||
if (nr_io_queues == 0) {
|
||||
dev_err(ctrl->device,
|
||||
"unable to set any I/O queues\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ctrl->queue_count = nr_io_queues + 1;
|
||||
dev_info(ctrl->device,
|
||||
"creating %d I/O queues.\n", nr_io_queues);
|
||||
|
||||
|
@ -72,6 +72,20 @@ static const char *nvme_trace_admin_identify(struct trace_seq *p, u8 *cdw10)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvme_trace_admin_set_features(struct trace_seq *p,
|
||||
u8 *cdw10)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u8 fid = cdw10[0];
|
||||
u8 sv = cdw10[3] & 0x8;
|
||||
u32 cdw11 = get_unaligned_le32(cdw10 + 4);
|
||||
|
||||
trace_seq_printf(p, "fid=0x%x, sv=0x%x, cdw11=0x%x", fid, sv, cdw11);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvme_trace_admin_get_features(struct trace_seq *p,
|
||||
u8 *cdw10)
|
||||
{
|
||||
@ -80,7 +94,7 @@ static const char *nvme_trace_admin_get_features(struct trace_seq *p,
|
||||
u8 sel = cdw10[1] & 0x7;
|
||||
u32 cdw11 = get_unaligned_le32(cdw10 + 4);
|
||||
|
||||
trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11);
|
||||
trace_seq_printf(p, "fid=0x%x, sel=0x%x, cdw11=0x%x", fid, sel, cdw11);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
@ -201,6 +215,8 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p,
|
||||
return nvme_trace_create_cq(p, cdw10);
|
||||
case nvme_admin_identify:
|
||||
return nvme_trace_admin_identify(p, cdw10);
|
||||
case nvme_admin_set_features:
|
||||
return nvme_trace_admin_set_features(p, cdw10);
|
||||
case nvme_admin_get_features:
|
||||
return nvme_trace_admin_get_features(p, cdw10);
|
||||
case nvme_admin_get_lba_status:
|
||||
|
@ -31,7 +31,6 @@ config NVME_TARGET_PASSTHRU
|
||||
config NVME_TARGET_LOOP
|
||||
tristate "NVMe loopback device support"
|
||||
depends on NVME_TARGET
|
||||
select NVME_CORE
|
||||
select NVME_FABRICS
|
||||
select SG_POOL
|
||||
help
|
||||
@ -65,7 +64,6 @@ config NVME_TARGET_FC
|
||||
config NVME_TARGET_FCLOOP
|
||||
tristate "NVMe over Fabrics FC Transport Loopback Test driver"
|
||||
depends on NVME_TARGET
|
||||
select NVME_CORE
|
||||
select NVME_FABRICS
|
||||
select SG_POOL
|
||||
depends on NVME_FC
|
||||
|
@ -802,6 +802,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
|
||||
* controller teardown as a result of a keep-alive expiration.
|
||||
*/
|
||||
ctrl->reset_tbkas = true;
|
||||
sq->ctrl->sqs[sq->qid] = NULL;
|
||||
nvmet_ctrl_put(ctrl);
|
||||
sq->ctrl = NULL; /* allows reusing the queue later */
|
||||
}
|
||||
|
@ -109,20 +109,37 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
|
||||
u16 qid = le16_to_cpu(c->qid);
|
||||
u16 sqsize = le16_to_cpu(c->sqsize);
|
||||
struct nvmet_ctrl *old;
|
||||
u16 mqes = NVME_CAP_MQES(ctrl->cap);
|
||||
u16 ret;
|
||||
|
||||
if (!sqsize) {
|
||||
pr_warn("queue size zero!\n");
|
||||
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
|
||||
req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize);
|
||||
ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (ctrl->sqs[qid] != NULL) {
|
||||
pr_warn("qid %u has already been created\n", qid);
|
||||
req->error_loc = offsetof(struct nvmf_connect_command, qid);
|
||||
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
|
||||
}
|
||||
|
||||
if (sqsize > mqes) {
|
||||
pr_warn("sqsize %u is larger than MQES supported %u cntlid %d\n",
|
||||
sqsize, mqes, ctrl->cntlid);
|
||||
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
|
||||
req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize);
|
||||
return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
|
||||
}
|
||||
|
||||
old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
|
||||
if (old) {
|
||||
pr_warn("queue already connected!\n");
|
||||
req->error_loc = offsetof(struct nvmf_connect_command, opcode);
|
||||
return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
|
||||
}
|
||||
if (!sqsize) {
|
||||
pr_warn("queue size zero!\n");
|
||||
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
|
||||
ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* note: convert queue size from 0's-based value to 1's-based value */
|
||||
nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1);
|
||||
@ -138,6 +155,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
|
||||
if (ret) {
|
||||
pr_err("failed to install queue %d cntlid %d ret %x\n",
|
||||
qid, ctrl->cntlid, ret);
|
||||
ctrl->sqs[qid] = NULL;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
@ -260,11 +278,11 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
|
||||
}
|
||||
|
||||
status = nvmet_install_queue(ctrl, req);
|
||||
if (status) {
|
||||
/* pass back cntlid that had the issue of installing queue */
|
||||
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
|
||||
if (status)
|
||||
goto out_ctrl_put;
|
||||
}
|
||||
|
||||
/* pass back cntlid for successful completion */
|
||||
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
|
||||
|
||||
pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
|
||||
|
||||
|
@ -107,10 +107,10 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
|
||||
} else {
|
||||
struct request *rq;
|
||||
|
||||
rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id);
|
||||
rq = nvme_find_rq(nvme_loop_tagset(queue), cqe->command_id);
|
||||
if (!rq) {
|
||||
dev_err(queue->ctrl->ctrl.device,
|
||||
"tag 0x%x on queue %d not found\n",
|
||||
"got bad command_id %#x on queue %d\n",
|
||||
cqe->command_id, nvme_loop_queue_idx(queue));
|
||||
return;
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ static const char *nvmet_trace_admin_get_features(struct trace_seq *p,
|
||||
u8 sel = cdw10[1] & 0x7;
|
||||
u32 cdw11 = get_unaligned_le32(cdw10 + 4);
|
||||
|
||||
trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11);
|
||||
trace_seq_printf(p, "fid=0x%x, sel=0x%x, cdw11=0x%x", fid, sel, cdw11);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
@ -49,6 +49,20 @@ static const char *nvmet_trace_get_lba_status(struct trace_seq *p,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvmet_trace_admin_set_features(struct trace_seq *p,
|
||||
u8 *cdw10)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u8 fid = cdw10[0];
|
||||
u8 sv = cdw10[3] & 0x8;
|
||||
u32 cdw11 = get_unaligned_le32(cdw10 + 4);
|
||||
|
||||
trace_seq_printf(p, "fid=0x%x, sv=0x%x, cdw11=0x%x", fid, sv, cdw11);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvmet_trace_read_write(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
@ -94,6 +108,8 @@ const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p,
|
||||
switch (opcode) {
|
||||
case nvme_admin_identify:
|
||||
return nvmet_trace_admin_identify(p, cdw10);
|
||||
case nvme_admin_set_features:
|
||||
return nvmet_trace_admin_set_features(p, cdw10);
|
||||
case nvme_admin_get_features:
|
||||
return nvmet_trace_admin_get_features(p, cdw10);
|
||||
case nvme_admin_get_lba_status:
|
||||
|
@ -115,14 +115,11 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
|
||||
}
|
||||
|
||||
status = nvmet_req_find_ns(req);
|
||||
if (status) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
if (status)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!bdev_is_zoned(req->ns->bdev)) {
|
||||
req->error_loc = offsetof(struct nvme_identify, nsid);
|
||||
status = NVME_SC_INVALID_NS | NVME_SC_DNR;
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
@ -1,697 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef NVM_H
|
||||
#define NVM_H
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/types.h>
|
||||
#include <uapi/linux/lightnvm.h>
|
||||
|
||||
enum {
|
||||
NVM_IO_OK = 0,
|
||||
NVM_IO_REQUEUE = 1,
|
||||
NVM_IO_DONE = 2,
|
||||
NVM_IO_ERR = 3,
|
||||
|
||||
NVM_IOTYPE_NONE = 0,
|
||||
NVM_IOTYPE_GC = 1,
|
||||
};
|
||||
|
||||
/* common format */
|
||||
#define NVM_GEN_CH_BITS (8)
|
||||
#define NVM_GEN_LUN_BITS (8)
|
||||
#define NVM_GEN_BLK_BITS (16)
|
||||
#define NVM_GEN_RESERVED (32)
|
||||
|
||||
/* 1.2 format */
|
||||
#define NVM_12_PG_BITS (16)
|
||||
#define NVM_12_PL_BITS (4)
|
||||
#define NVM_12_SEC_BITS (4)
|
||||
#define NVM_12_RESERVED (8)
|
||||
|
||||
/* 2.0 format */
|
||||
#define NVM_20_SEC_BITS (24)
|
||||
#define NVM_20_RESERVED (8)
|
||||
|
||||
enum {
|
||||
NVM_OCSSD_SPEC_12 = 12,
|
||||
NVM_OCSSD_SPEC_20 = 20,
|
||||
};
|
||||
|
||||
struct ppa_addr {
|
||||
/* Generic structure for all addresses */
|
||||
union {
|
||||
/* generic device format */
|
||||
struct {
|
||||
u64 ch : NVM_GEN_CH_BITS;
|
||||
u64 lun : NVM_GEN_LUN_BITS;
|
||||
u64 blk : NVM_GEN_BLK_BITS;
|
||||
u64 reserved : NVM_GEN_RESERVED;
|
||||
} a;
|
||||
|
||||
/* 1.2 device format */
|
||||
struct {
|
||||
u64 ch : NVM_GEN_CH_BITS;
|
||||
u64 lun : NVM_GEN_LUN_BITS;
|
||||
u64 blk : NVM_GEN_BLK_BITS;
|
||||
u64 pg : NVM_12_PG_BITS;
|
||||
u64 pl : NVM_12_PL_BITS;
|
||||
u64 sec : NVM_12_SEC_BITS;
|
||||
u64 reserved : NVM_12_RESERVED;
|
||||
} g;
|
||||
|
||||
/* 2.0 device format */
|
||||
struct {
|
||||
u64 grp : NVM_GEN_CH_BITS;
|
||||
u64 pu : NVM_GEN_LUN_BITS;
|
||||
u64 chk : NVM_GEN_BLK_BITS;
|
||||
u64 sec : NVM_20_SEC_BITS;
|
||||
u64 reserved : NVM_20_RESERVED;
|
||||
} m;
|
||||
|
||||
struct {
|
||||
u64 line : 63;
|
||||
u64 is_cached : 1;
|
||||
} c;
|
||||
|
||||
u64 ppa;
|
||||
};
|
||||
};
|
||||
|
||||
struct nvm_rq;
|
||||
struct nvm_id;
|
||||
struct nvm_dev;
|
||||
struct nvm_tgt_dev;
|
||||
struct nvm_chk_meta;
|
||||
|
||||
typedef int (nvm_id_fn)(struct nvm_dev *);
|
||||
typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
|
||||
typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
|
||||
typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
|
||||
struct nvm_chk_meta *);
|
||||
typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *, void *);
|
||||
typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
|
||||
typedef void (nvm_destroy_dma_pool_fn)(void *);
|
||||
typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
|
||||
dma_addr_t *);
|
||||
typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
|
||||
|
||||
struct nvm_dev_ops {
|
||||
nvm_id_fn *identity;
|
||||
nvm_op_bb_tbl_fn *get_bb_tbl;
|
||||
nvm_op_set_bb_fn *set_bb_tbl;
|
||||
|
||||
nvm_get_chk_meta_fn *get_chk_meta;
|
||||
|
||||
nvm_submit_io_fn *submit_io;
|
||||
|
||||
nvm_create_dma_pool_fn *create_dma_pool;
|
||||
nvm_destroy_dma_pool_fn *destroy_dma_pool;
|
||||
nvm_dev_dma_alloc_fn *dev_dma_alloc;
|
||||
nvm_dev_dma_free_fn *dev_dma_free;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_NVM
|
||||
|
||||
#include <linux/file.h>
|
||||
#include <linux/dmapool.h>
|
||||
|
||||
enum {
|
||||
/* HW Responsibilities */
|
||||
NVM_RSP_L2P = 1 << 0,
|
||||
NVM_RSP_ECC = 1 << 1,
|
||||
|
||||
/* Physical Adressing Mode */
|
||||
NVM_ADDRMODE_LINEAR = 0,
|
||||
NVM_ADDRMODE_CHANNEL = 1,
|
||||
|
||||
/* Plane programming mode for LUN */
|
||||
NVM_PLANE_SINGLE = 1,
|
||||
NVM_PLANE_DOUBLE = 2,
|
||||
NVM_PLANE_QUAD = 4,
|
||||
|
||||
/* Status codes */
|
||||
NVM_RSP_SUCCESS = 0x0,
|
||||
NVM_RSP_NOT_CHANGEABLE = 0x1,
|
||||
NVM_RSP_ERR_FAILWRITE = 0x40ff,
|
||||
NVM_RSP_ERR_EMPTYPAGE = 0x42ff,
|
||||
NVM_RSP_ERR_FAILECC = 0x4281,
|
||||
NVM_RSP_ERR_FAILCRC = 0x4004,
|
||||
NVM_RSP_WARN_HIGHECC = 0x4700,
|
||||
|
||||
/* Device opcodes */
|
||||
NVM_OP_PWRITE = 0x91,
|
||||
NVM_OP_PREAD = 0x92,
|
||||
NVM_OP_ERASE = 0x90,
|
||||
|
||||
/* PPA Command Flags */
|
||||
NVM_IO_SNGL_ACCESS = 0x0,
|
||||
NVM_IO_DUAL_ACCESS = 0x1,
|
||||
NVM_IO_QUAD_ACCESS = 0x2,
|
||||
|
||||
/* NAND Access Modes */
|
||||
NVM_IO_SUSPEND = 0x80,
|
||||
NVM_IO_SLC_MODE = 0x100,
|
||||
NVM_IO_SCRAMBLE_ENABLE = 0x200,
|
||||
|
||||
/* Block Types */
|
||||
NVM_BLK_T_FREE = 0x0,
|
||||
NVM_BLK_T_BAD = 0x1,
|
||||
NVM_BLK_T_GRWN_BAD = 0x2,
|
||||
NVM_BLK_T_DEV = 0x4,
|
||||
NVM_BLK_T_HOST = 0x8,
|
||||
|
||||
/* Memory capabilities */
|
||||
NVM_ID_CAP_SLC = 0x1,
|
||||
NVM_ID_CAP_CMD_SUSPEND = 0x2,
|
||||
NVM_ID_CAP_SCRAMBLE = 0x4,
|
||||
NVM_ID_CAP_ENCRYPT = 0x8,
|
||||
|
||||
/* Memory types */
|
||||
NVM_ID_FMTYPE_SLC = 0,
|
||||
NVM_ID_FMTYPE_MLC = 1,
|
||||
|
||||
/* Device capabilities */
|
||||
NVM_ID_DCAP_BBLKMGMT = 0x1,
|
||||
NVM_UD_DCAP_ECC = 0x2,
|
||||
};
|
||||
|
||||
struct nvm_id_lp_mlc {
|
||||
u16 num_pairs;
|
||||
u8 pairs[886];
|
||||
};
|
||||
|
||||
struct nvm_id_lp_tbl {
|
||||
__u8 id[8];
|
||||
struct nvm_id_lp_mlc mlc;
|
||||
};
|
||||
|
||||
struct nvm_addrf_12 {
|
||||
u8 ch_len;
|
||||
u8 lun_len;
|
||||
u8 blk_len;
|
||||
u8 pg_len;
|
||||
u8 pln_len;
|
||||
u8 sec_len;
|
||||
|
||||
u8 ch_offset;
|
||||
u8 lun_offset;
|
||||
u8 blk_offset;
|
||||
u8 pg_offset;
|
||||
u8 pln_offset;
|
||||
u8 sec_offset;
|
||||
|
||||
u64 ch_mask;
|
||||
u64 lun_mask;
|
||||
u64 blk_mask;
|
||||
u64 pg_mask;
|
||||
u64 pln_mask;
|
||||
u64 sec_mask;
|
||||
};
|
||||
|
||||
struct nvm_addrf {
|
||||
u8 ch_len;
|
||||
u8 lun_len;
|
||||
u8 chk_len;
|
||||
u8 sec_len;
|
||||
u8 rsv_len[2];
|
||||
|
||||
u8 ch_offset;
|
||||
u8 lun_offset;
|
||||
u8 chk_offset;
|
||||
u8 sec_offset;
|
||||
u8 rsv_off[2];
|
||||
|
||||
u64 ch_mask;
|
||||
u64 lun_mask;
|
||||
u64 chk_mask;
|
||||
u64 sec_mask;
|
||||
u64 rsv_mask[2];
|
||||
};
|
||||
|
||||
enum {
|
||||
/* Chunk states */
|
||||
NVM_CHK_ST_FREE = 1 << 0,
|
||||
NVM_CHK_ST_CLOSED = 1 << 1,
|
||||
NVM_CHK_ST_OPEN = 1 << 2,
|
||||
NVM_CHK_ST_OFFLINE = 1 << 3,
|
||||
|
||||
/* Chunk types */
|
||||
NVM_CHK_TP_W_SEQ = 1 << 0,
|
||||
NVM_CHK_TP_W_RAN = 1 << 1,
|
||||
NVM_CHK_TP_SZ_SPEC = 1 << 4,
|
||||
};
|
||||
|
||||
/*
|
||||
* Note: The structure size is linked to nvme_nvm_chk_meta such that the same
|
||||
* buffer can be used when converting from little endian to cpu addressing.
|
||||
*/
|
||||
struct nvm_chk_meta {
|
||||
u8 state;
|
||||
u8 type;
|
||||
u8 wi;
|
||||
u8 rsvd[5];
|
||||
u64 slba;
|
||||
u64 cnlb;
|
||||
u64 wp;
|
||||
};
|
||||
|
||||
struct nvm_target {
|
||||
struct list_head list;
|
||||
struct nvm_tgt_dev *dev;
|
||||
struct nvm_tgt_type *type;
|
||||
struct gendisk *disk;
|
||||
};
|
||||
|
||||
#define ADDR_EMPTY (~0ULL)
|
||||
|
||||
#define NVM_TARGET_DEFAULT_OP (101)
|
||||
#define NVM_TARGET_MIN_OP (3)
|
||||
#define NVM_TARGET_MAX_OP (80)
|
||||
|
||||
#define NVM_VERSION_MAJOR 1
|
||||
#define NVM_VERSION_MINOR 0
|
||||
#define NVM_VERSION_PATCH 0
|
||||
|
||||
#define NVM_MAX_VLBA (64) /* max logical blocks in a vector command */
|
||||
|
||||
struct nvm_rq;
|
||||
typedef void (nvm_end_io_fn)(struct nvm_rq *);
|
||||
|
||||
struct nvm_rq {
|
||||
struct nvm_tgt_dev *dev;
|
||||
|
||||
struct bio *bio;
|
||||
|
||||
union {
|
||||
struct ppa_addr ppa_addr;
|
||||
dma_addr_t dma_ppa_list;
|
||||
};
|
||||
|
||||
struct ppa_addr *ppa_list;
|
||||
|
||||
void *meta_list;
|
||||
dma_addr_t dma_meta_list;
|
||||
|
||||
nvm_end_io_fn *end_io;
|
||||
|
||||
uint8_t opcode;
|
||||
uint16_t nr_ppas;
|
||||
uint16_t flags;
|
||||
|
||||
u64 ppa_status; /* ppa media status */
|
||||
int error;
|
||||
|
||||
int is_seq; /* Sequential hint flag. 1.2 only */
|
||||
|
||||
void *private;
|
||||
};
|
||||
|
||||
static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu)
|
||||
{
|
||||
return pdu - sizeof(struct nvm_rq);
|
||||
}
|
||||
|
||||
static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
|
||||
{
|
||||
return rqdata + 1;
|
||||
}
|
||||
|
||||
static inline struct ppa_addr *nvm_rq_to_ppa_list(struct nvm_rq *rqd)
|
||||
{
|
||||
return (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
|
||||
}
|
||||
|
||||
enum {
|
||||
NVM_BLK_ST_FREE = 0x1, /* Free block */
|
||||
NVM_BLK_ST_TGT = 0x2, /* Block in use by target */
|
||||
NVM_BLK_ST_BAD = 0x8, /* Bad block */
|
||||
};
|
||||
|
||||
/* Instance geometry */
|
||||
struct nvm_geo {
|
||||
/* device reported version */
|
||||
u8 major_ver_id;
|
||||
u8 minor_ver_id;
|
||||
|
||||
/* kernel short version */
|
||||
u8 version;
|
||||
|
||||
/* instance specific geometry */
|
||||
int num_ch;
|
||||
int num_lun; /* per channel */
|
||||
|
||||
/* calculated values */
|
||||
int all_luns; /* across channels */
|
||||
int all_chunks; /* across channels */
|
||||
|
||||
int op; /* over-provision in instance */
|
||||
|
||||
sector_t total_secs; /* across channels */
|
||||
|
||||
/* chunk geometry */
|
||||
u32 num_chk; /* chunks per lun */
|
||||
u32 clba; /* sectors per chunk */
|
||||
u16 csecs; /* sector size */
|
||||
u16 sos; /* out-of-band area size */
|
||||
bool ext; /* metadata in extended data buffer */
|
||||
u32 mdts; /* Max data transfer size*/
|
||||
|
||||
/* device write constrains */
|
||||
u32 ws_min; /* minimum write size */
|
||||
u32 ws_opt; /* optimal write size */
|
||||
u32 mw_cunits; /* distance required for successful read */
|
||||
u32 maxoc; /* maximum open chunks */
|
||||
u32 maxocpu; /* maximum open chunks per parallel unit */
|
||||
|
||||
/* device capabilities */
|
||||
u32 mccap;
|
||||
|
||||
/* device timings */
|
||||
u32 trdt; /* Avg. Tread (ns) */
|
||||
u32 trdm; /* Max Tread (ns) */
|
||||
u32 tprt; /* Avg. Tprog (ns) */
|
||||
u32 tprm; /* Max Tprog (ns) */
|
||||
u32 tbet; /* Avg. Terase (ns) */
|
||||
u32 tbem; /* Max Terase (ns) */
|
||||
|
||||
/* generic address format */
|
||||
struct nvm_addrf addrf;
|
||||
|
||||
/* 1.2 compatibility */
|
||||
u8 vmnt;
|
||||
u32 cap;
|
||||
u32 dom;
|
||||
|
||||
u8 mtype;
|
||||
u8 fmtype;
|
||||
|
||||
u16 cpar;
|
||||
u32 mpos;
|
||||
|
||||
u8 num_pln;
|
||||
u8 pln_mode;
|
||||
u16 num_pg;
|
||||
u16 fpg_sz;
|
||||
};
|
||||
|
||||
/* sub-device structure */
|
||||
struct nvm_tgt_dev {
|
||||
/* Device information */
|
||||
struct nvm_geo geo;
|
||||
|
||||
/* Base ppas for target LUNs */
|
||||
struct ppa_addr *luns;
|
||||
|
||||
struct request_queue *q;
|
||||
|
||||
struct nvm_dev *parent;
|
||||
void *map;
|
||||
};
|
||||
|
||||
struct nvm_dev {
|
||||
struct nvm_dev_ops *ops;
|
||||
|
||||
struct list_head devices;
|
||||
|
||||
/* Device information */
|
||||
struct nvm_geo geo;
|
||||
|
||||
unsigned long *lun_map;
|
||||
void *dma_pool;
|
||||
|
||||
/* Backend device */
|
||||
struct request_queue *q;
|
||||
char name[DISK_NAME_LEN];
|
||||
void *private_data;
|
||||
|
||||
struct kref ref;
|
||||
void *rmap;
|
||||
|
||||
struct mutex mlock;
|
||||
spinlock_t lock;
|
||||
|
||||
/* target management */
|
||||
struct list_head area_list;
|
||||
struct list_head targets;
|
||||
};
|
||||
|
||||
static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
|
||||
struct ppa_addr r)
|
||||
{
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct ppa_addr l;
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf;
|
||||
|
||||
l.ppa = ((u64)r.g.ch) << ppaf->ch_offset;
|
||||
l.ppa |= ((u64)r.g.lun) << ppaf->lun_offset;
|
||||
l.ppa |= ((u64)r.g.blk) << ppaf->blk_offset;
|
||||
l.ppa |= ((u64)r.g.pg) << ppaf->pg_offset;
|
||||
l.ppa |= ((u64)r.g.pl) << ppaf->pln_offset;
|
||||
l.ppa |= ((u64)r.g.sec) << ppaf->sec_offset;
|
||||
} else {
|
||||
struct nvm_addrf *lbaf = &geo->addrf;
|
||||
|
||||
l.ppa = ((u64)r.m.grp) << lbaf->ch_offset;
|
||||
l.ppa |= ((u64)r.m.pu) << lbaf->lun_offset;
|
||||
l.ppa |= ((u64)r.m.chk) << lbaf->chk_offset;
|
||||
l.ppa |= ((u64)r.m.sec) << lbaf->sec_offset;
|
||||
}
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
|
||||
struct ppa_addr r)
|
||||
{
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct ppa_addr l;
|
||||
|
||||
l.ppa = 0;
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf;
|
||||
|
||||
l.g.ch = (r.ppa & ppaf->ch_mask) >> ppaf->ch_offset;
|
||||
l.g.lun = (r.ppa & ppaf->lun_mask) >> ppaf->lun_offset;
|
||||
l.g.blk = (r.ppa & ppaf->blk_mask) >> ppaf->blk_offset;
|
||||
l.g.pg = (r.ppa & ppaf->pg_mask) >> ppaf->pg_offset;
|
||||
l.g.pl = (r.ppa & ppaf->pln_mask) >> ppaf->pln_offset;
|
||||
l.g.sec = (r.ppa & ppaf->sec_mask) >> ppaf->sec_offset;
|
||||
} else {
|
||||
struct nvm_addrf *lbaf = &geo->addrf;
|
||||
|
||||
l.m.grp = (r.ppa & lbaf->ch_mask) >> lbaf->ch_offset;
|
||||
l.m.pu = (r.ppa & lbaf->lun_mask) >> lbaf->lun_offset;
|
||||
l.m.chk = (r.ppa & lbaf->chk_mask) >> lbaf->chk_offset;
|
||||
l.m.sec = (r.ppa & lbaf->sec_mask) >> lbaf->sec_offset;
|
||||
}
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
static inline u64 dev_to_chunk_addr(struct nvm_dev *dev, void *addrf,
|
||||
struct ppa_addr p)
|
||||
{
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
u64 caddr;
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)addrf;
|
||||
|
||||
caddr = (u64)p.g.pg << ppaf->pg_offset;
|
||||
caddr |= (u64)p.g.pl << ppaf->pln_offset;
|
||||
caddr |= (u64)p.g.sec << ppaf->sec_offset;
|
||||
} else {
|
||||
caddr = p.m.sec;
|
||||
}
|
||||
|
||||
return caddr;
|
||||
}
|
||||
|
||||
static inline struct ppa_addr nvm_ppa32_to_ppa64(struct nvm_dev *dev,
|
||||
void *addrf, u32 ppa32)
|
||||
{
|
||||
struct ppa_addr ppa64;
|
||||
|
||||
ppa64.ppa = 0;
|
||||
|
||||
if (ppa32 == -1) {
|
||||
ppa64.ppa = ADDR_EMPTY;
|
||||
} else if (ppa32 & (1U << 31)) {
|
||||
ppa64.c.line = ppa32 & ((~0U) >> 1);
|
||||
ppa64.c.is_cached = 1;
|
||||
} else {
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
struct nvm_addrf_12 *ppaf = addrf;
|
||||
|
||||
ppa64.g.ch = (ppa32 & ppaf->ch_mask) >>
|
||||
ppaf->ch_offset;
|
||||
ppa64.g.lun = (ppa32 & ppaf->lun_mask) >>
|
||||
ppaf->lun_offset;
|
||||
ppa64.g.blk = (ppa32 & ppaf->blk_mask) >>
|
||||
ppaf->blk_offset;
|
||||
ppa64.g.pg = (ppa32 & ppaf->pg_mask) >>
|
||||
ppaf->pg_offset;
|
||||
ppa64.g.pl = (ppa32 & ppaf->pln_mask) >>
|
||||
ppaf->pln_offset;
|
||||
ppa64.g.sec = (ppa32 & ppaf->sec_mask) >>
|
||||
ppaf->sec_offset;
|
||||
} else {
|
||||
struct nvm_addrf *lbaf = addrf;
|
||||
|
||||
ppa64.m.grp = (ppa32 & lbaf->ch_mask) >>
|
||||
lbaf->ch_offset;
|
||||
ppa64.m.pu = (ppa32 & lbaf->lun_mask) >>
|
||||
lbaf->lun_offset;
|
||||
ppa64.m.chk = (ppa32 & lbaf->chk_mask) >>
|
||||
lbaf->chk_offset;
|
||||
ppa64.m.sec = (ppa32 & lbaf->sec_mask) >>
|
||||
lbaf->sec_offset;
|
||||
}
|
||||
}
|
||||
|
||||
return ppa64;
|
||||
}
|
||||
|
||||
static inline u32 nvm_ppa64_to_ppa32(struct nvm_dev *dev,
|
||||
void *addrf, struct ppa_addr ppa64)
|
||||
{
|
||||
u32 ppa32 = 0;
|
||||
|
||||
if (ppa64.ppa == ADDR_EMPTY) {
|
||||
ppa32 = ~0U;
|
||||
} else if (ppa64.c.is_cached) {
|
||||
ppa32 |= ppa64.c.line;
|
||||
ppa32 |= 1U << 31;
|
||||
} else {
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
struct nvm_addrf_12 *ppaf = addrf;
|
||||
|
||||
ppa32 |= ppa64.g.ch << ppaf->ch_offset;
|
||||
ppa32 |= ppa64.g.lun << ppaf->lun_offset;
|
||||
ppa32 |= ppa64.g.blk << ppaf->blk_offset;
|
||||
ppa32 |= ppa64.g.pg << ppaf->pg_offset;
|
||||
ppa32 |= ppa64.g.pl << ppaf->pln_offset;
|
||||
ppa32 |= ppa64.g.sec << ppaf->sec_offset;
|
||||
} else {
|
||||
struct nvm_addrf *lbaf = addrf;
|
||||
|
||||
ppa32 |= ppa64.m.grp << lbaf->ch_offset;
|
||||
ppa32 |= ppa64.m.pu << lbaf->lun_offset;
|
||||
ppa32 |= ppa64.m.chk << lbaf->chk_offset;
|
||||
ppa32 |= ppa64.m.sec << lbaf->sec_offset;
|
||||
}
|
||||
}
|
||||
|
||||
return ppa32;
|
||||
}
|
||||
|
||||
static inline int nvm_next_ppa_in_chk(struct nvm_tgt_dev *dev,
|
||||
struct ppa_addr *ppa)
|
||||
{
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int last = 0;
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
int sec = ppa->g.sec;
|
||||
|
||||
sec++;
|
||||
if (sec == geo->ws_min) {
|
||||
int pg = ppa->g.pg;
|
||||
|
||||
sec = 0;
|
||||
pg++;
|
||||
if (pg == geo->num_pg) {
|
||||
int pl = ppa->g.pl;
|
||||
|
||||
pg = 0;
|
||||
pl++;
|
||||
if (pl == geo->num_pln)
|
||||
last = 1;
|
||||
|
||||
ppa->g.pl = pl;
|
||||
}
|
||||
ppa->g.pg = pg;
|
||||
}
|
||||
ppa->g.sec = sec;
|
||||
} else {
|
||||
ppa->m.sec++;
|
||||
if (ppa->m.sec == geo->clba)
|
||||
last = 1;
|
||||
}
|
||||
|
||||
return last;
|
||||
}
|
||||
|
||||
typedef sector_t (nvm_tgt_capacity_fn)(void *);
|
||||
typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
|
||||
int flags);
|
||||
typedef void (nvm_tgt_exit_fn)(void *, bool);
|
||||
typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *);
|
||||
typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *);
|
||||
|
||||
enum {
|
||||
NVM_TGT_F_DEV_L2P = 0,
|
||||
NVM_TGT_F_HOST_L2P = 1 << 0,
|
||||
};
|
||||
|
||||
struct nvm_tgt_type {
|
||||
const char *name;
|
||||
unsigned int version[3];
|
||||
int flags;
|
||||
|
||||
/* target entry points */
|
||||
const struct block_device_operations *bops;
|
||||
nvm_tgt_capacity_fn *capacity;
|
||||
|
||||
/* module-specific init/teardown */
|
||||
nvm_tgt_init_fn *init;
|
||||
nvm_tgt_exit_fn *exit;
|
||||
|
||||
/* sysfs */
|
||||
nvm_tgt_sysfs_init_fn *sysfs_init;
|
||||
nvm_tgt_sysfs_exit_fn *sysfs_exit;
|
||||
|
||||
/* For internal use */
|
||||
struct list_head list;
|
||||
struct module *owner;
|
||||
};
|
||||
|
||||
extern int nvm_register_tgt_type(struct nvm_tgt_type *);
|
||||
extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
|
||||
|
||||
extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
|
||||
extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
|
||||
|
||||
extern struct nvm_dev *nvm_alloc_dev(int);
|
||||
extern int nvm_register(struct nvm_dev *);
|
||||
extern void nvm_unregister(struct nvm_dev *);
|
||||
|
||||
extern int nvm_get_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr,
|
||||
int, struct nvm_chk_meta *);
|
||||
extern int nvm_set_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr *,
|
||||
int, int);
|
||||
extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *, void *);
|
||||
extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *, void *);
|
||||
extern void nvm_end_io(struct nvm_rq *);
|
||||
|
||||
#else /* CONFIG_NVM */
|
||||
struct nvm_dev_ops;
|
||||
|
||||
static inline struct nvm_dev *nvm_alloc_dev(int node)
|
||||
{
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
static inline int nvm_register(struct nvm_dev *dev)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline void nvm_unregister(struct nvm_dev *dev) {}
|
||||
#endif /* CONFIG_NVM */
|
||||
#endif /* LIGHTNVM.H */
|
@ -431,6 +431,8 @@ extern int param_get_int(char *buffer, const struct kernel_param *kp);
|
||||
extern const struct kernel_param_ops param_ops_uint;
|
||||
extern int param_set_uint(const char *val, const struct kernel_param *kp);
|
||||
extern int param_get_uint(char *buffer, const struct kernel_param *kp);
|
||||
int param_set_uint_minmax(const char *val, const struct kernel_param *kp,
|
||||
unsigned int min, unsigned int max);
|
||||
#define param_check_uint(name, p) __param_check(name, p, unsigned int)
|
||||
|
||||
extern const struct kernel_param_ops param_ops_long;
|
||||
|
@ -1,224 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
* Copyright (C) 2015 CNEX Labs. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
|
||||
* USA.
|
||||
*/
|
||||
|
||||
#ifndef _UAPI_LINUX_LIGHTNVM_H
|
||||
#define _UAPI_LINUX_LIGHTNVM_H
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/const.h>
|
||||
#else /* __KERNEL__ */
|
||||
#include <stdio.h>
|
||||
#include <sys/ioctl.h>
|
||||
#define DISK_NAME_LEN 32
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/ioctl.h>
|
||||
|
||||
#define NVM_TTYPE_NAME_MAX 48
|
||||
#define NVM_TTYPE_MAX 63
|
||||
#define NVM_MMTYPE_LEN 8
|
||||
|
||||
#define NVM_CTRL_FILE "/dev/lightnvm/control"
|
||||
|
||||
struct nvm_ioctl_info_tgt {
|
||||
__u32 version[3];
|
||||
__u32 reserved;
|
||||
char tgtname[NVM_TTYPE_NAME_MAX];
|
||||
};
|
||||
|
||||
struct nvm_ioctl_info {
|
||||
__u32 version[3]; /* in/out - major, minor, patch */
|
||||
__u16 tgtsize; /* number of targets */
|
||||
__u16 reserved16; /* pad to 4K page */
|
||||
__u32 reserved[12];
|
||||
struct nvm_ioctl_info_tgt tgts[NVM_TTYPE_MAX];
|
||||
};
|
||||
|
||||
enum {
|
||||
NVM_DEVICE_ACTIVE = 1 << 0,
|
||||
};
|
||||
|
||||
struct nvm_ioctl_device_info {
|
||||
char devname[DISK_NAME_LEN];
|
||||
char bmname[NVM_TTYPE_NAME_MAX];
|
||||
__u32 bmversion[3];
|
||||
__u32 flags;
|
||||
__u32 reserved[8];
|
||||
};
|
||||
|
||||
struct nvm_ioctl_get_devices {
|
||||
__u32 nr_devices;
|
||||
__u32 reserved[31];
|
||||
struct nvm_ioctl_device_info info[31];
|
||||
};
|
||||
|
||||
struct nvm_ioctl_create_simple {
|
||||
__u32 lun_begin;
|
||||
__u32 lun_end;
|
||||
};
|
||||
|
||||
struct nvm_ioctl_create_extended {
|
||||
__u16 lun_begin;
|
||||
__u16 lun_end;
|
||||
__u16 op;
|
||||
__u16 rsv;
|
||||
};
|
||||
|
||||
enum {
|
||||
NVM_CONFIG_TYPE_SIMPLE = 0,
|
||||
NVM_CONFIG_TYPE_EXTENDED = 1,
|
||||
};
|
||||
|
||||
struct nvm_ioctl_create_conf {
|
||||
__u32 type;
|
||||
union {
|
||||
struct nvm_ioctl_create_simple s;
|
||||
struct nvm_ioctl_create_extended e;
|
||||
};
|
||||
};
|
||||
|
||||
enum {
|
||||
NVM_TARGET_FACTORY = 1 << 0, /* Init target in factory mode */
|
||||
};
|
||||
|
||||
struct nvm_ioctl_create {
|
||||
char dev[DISK_NAME_LEN]; /* open-channel SSD device */
|
||||
char tgttype[NVM_TTYPE_NAME_MAX]; /* target type name */
|
||||
char tgtname[DISK_NAME_LEN]; /* dev to expose target as */
|
||||
|
||||
__u32 flags;
|
||||
|
||||
struct nvm_ioctl_create_conf conf;
|
||||
};
|
||||
|
||||
struct nvm_ioctl_remove {
|
||||
char tgtname[DISK_NAME_LEN];
|
||||
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
struct nvm_ioctl_dev_init {
|
||||
char dev[DISK_NAME_LEN]; /* open-channel SSD device */
|
||||
char mmtype[NVM_MMTYPE_LEN]; /* register to media manager */
|
||||
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
enum {
|
||||
NVM_FACTORY_ERASE_ONLY_USER = 1 << 0, /* erase only blocks used as
|
||||
* host blks or grown blks */
|
||||
NVM_FACTORY_RESET_HOST_BLKS = 1 << 1, /* remove host blk marks */
|
||||
NVM_FACTORY_RESET_GRWN_BBLKS = 1 << 2, /* remove grown blk marks */
|
||||
NVM_FACTORY_NR_BITS = 1 << 3, /* stops here */
|
||||
};
|
||||
|
||||
struct nvm_ioctl_dev_factory {
|
||||
char dev[DISK_NAME_LEN];
|
||||
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
struct nvm_user_vio {
|
||||
__u8 opcode;
|
||||
__u8 flags;
|
||||
__u16 control;
|
||||
__u16 nppas;
|
||||
__u16 rsvd;
|
||||
__u64 metadata;
|
||||
__u64 addr;
|
||||
__u64 ppa_list;
|
||||
__u32 metadata_len;
|
||||
__u32 data_len;
|
||||
__u64 status;
|
||||
__u32 result;
|
||||
__u32 rsvd3[3];
|
||||
};
|
||||
|
||||
struct nvm_passthru_vio {
|
||||
__u8 opcode;
|
||||
__u8 flags;
|
||||
__u8 rsvd[2];
|
||||
__u32 nsid;
|
||||
__u32 cdw2;
|
||||
__u32 cdw3;
|
||||
__u64 metadata;
|
||||
__u64 addr;
|
||||
__u32 metadata_len;
|
||||
__u32 data_len;
|
||||
__u64 ppa_list;
|
||||
__u16 nppas;
|
||||
__u16 control;
|
||||
__u32 cdw13;
|
||||
__u32 cdw14;
|
||||
__u32 cdw15;
|
||||
__u64 status;
|
||||
__u32 result;
|
||||
__u32 timeout_ms;
|
||||
};
|
||||
|
||||
/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */
|
||||
enum {
|
||||
/* top level cmds */
|
||||
NVM_INFO_CMD = 0x20,
|
||||
NVM_GET_DEVICES_CMD,
|
||||
|
||||
/* device level cmds */
|
||||
NVM_DEV_CREATE_CMD,
|
||||
NVM_DEV_REMOVE_CMD,
|
||||
|
||||
/* Init a device to support LightNVM media managers */
|
||||
NVM_DEV_INIT_CMD,
|
||||
|
||||
/* Factory reset device */
|
||||
NVM_DEV_FACTORY_CMD,
|
||||
|
||||
/* Vector user I/O */
|
||||
NVM_DEV_VIO_ADMIN_CMD = 0x41,
|
||||
NVM_DEV_VIO_CMD = 0x42,
|
||||
NVM_DEV_VIO_USER_CMD = 0x43,
|
||||
};
|
||||
|
||||
#define NVM_IOCTL 'L' /* 0x4c */
|
||||
|
||||
#define NVM_INFO _IOWR(NVM_IOCTL, NVM_INFO_CMD, \
|
||||
struct nvm_ioctl_info)
|
||||
#define NVM_GET_DEVICES _IOR(NVM_IOCTL, NVM_GET_DEVICES_CMD, \
|
||||
struct nvm_ioctl_get_devices)
|
||||
#define NVM_DEV_CREATE _IOW(NVM_IOCTL, NVM_DEV_CREATE_CMD, \
|
||||
struct nvm_ioctl_create)
|
||||
#define NVM_DEV_REMOVE _IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \
|
||||
struct nvm_ioctl_remove)
|
||||
#define NVM_DEV_INIT _IOW(NVM_IOCTL, NVM_DEV_INIT_CMD, \
|
||||
struct nvm_ioctl_dev_init)
|
||||
#define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \
|
||||
struct nvm_ioctl_dev_factory)
|
||||
|
||||
#define NVME_NVM_IOCTL_IO_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_USER_CMD, \
|
||||
struct nvm_passthru_vio)
|
||||
#define NVME_NVM_IOCTL_ADMIN_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_ADMIN_CMD,\
|
||||
struct nvm_passthru_vio)
|
||||
#define NVME_NVM_IOCTL_SUBMIT_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_CMD,\
|
||||
struct nvm_user_vio)
|
||||
|
||||
#define NVM_VERSION_MAJOR 1
|
||||
#define NVM_VERSION_MINOR 0
|
||||
#define NVM_VERSION_PATCHLEVEL 0
|
||||
|
||||
#endif
|
@ -243,6 +243,24 @@ STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", kstrtoul);
|
||||
STANDARD_PARAM_DEF(ullong, unsigned long long, "%llu", kstrtoull);
|
||||
STANDARD_PARAM_DEF(hexint, unsigned int, "%#08x", kstrtouint);
|
||||
|
||||
int param_set_uint_minmax(const char *val, const struct kernel_param *kp,
|
||||
unsigned int min, unsigned int max)
|
||||
{
|
||||
unsigned int num;
|
||||
int ret;
|
||||
|
||||
if (!val)
|
||||
return -EINVAL;
|
||||
ret = kstrtouint(val, 0, &num);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (num < min || num > max)
|
||||
return -EINVAL;
|
||||
*((unsigned int *)kp->arg) = num;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(param_set_uint_minmax);
|
||||
|
||||
int param_set_charp(const char *val, const struct kernel_param *kp)
|
||||
{
|
||||
if (strlen(val) > 1024) {
|
||||
|
@ -3149,24 +3149,6 @@ void cleanup_socket_xprt(void)
|
||||
xprt_unregister_transport(&xs_bc_tcp_transport);
|
||||
}
|
||||
|
||||
static int param_set_uint_minmax(const char *val,
|
||||
const struct kernel_param *kp,
|
||||
unsigned int min, unsigned int max)
|
||||
{
|
||||
unsigned int num;
|
||||
int ret;
|
||||
|
||||
if (!val)
|
||||
return -EINVAL;
|
||||
ret = kstrtouint(val, 0, &num);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (num < min || num > max)
|
||||
return -EINVAL;
|
||||
*((unsigned int *)kp->arg) = num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int param_set_portnr(const char *val, const struct kernel_param *kp)
|
||||
{
|
||||
return param_set_uint_minmax(val, kp,
|
||||
|
Loading…
Reference in New Issue
Block a user