linux/drivers/s390/block/dcssblk.c

1033 lines
25 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* dcssblk.c -- the S/390 block driver for dcss memory
*
* Authors: Carsten Otte, Stefan Weinhuber, Gerald Schaefer
*/
#define KMSG_COMPONENT "dcssblk"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/completion.h>
#include <linux/interrupt.h>
#include <linux/pfn_t.h>
#include <linux/uio.h>
#include <linux/dax.h>
#include <linux/io.h>
#include <asm/extmem.h>
#define DCSSBLK_NAME "dcssblk"
#define DCSSBLK_MINORS_PER_DISK 1
#define DCSSBLK_PARM_LEN 400
#define DCSS_BUS_ID_SIZE 20
static int dcssblk_open(struct gendisk *disk, blk_mode_t mode);
static void dcssblk_release(struct gendisk *disk);
static void dcssblk_submit_bio(struct bio *bio);
static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
long nr_pages, enum dax_access_mode mode, void **kaddr,
pfn_t *pfn);
static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
static int dcssblk_major;
static const struct block_device_operations dcssblk_devops = {
.owner = THIS_MODULE,
.submit_bio = dcssblk_submit_bio,
.open = dcssblk_open,
.release = dcssblk_release,
};
static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
pgoff_t pgoff, size_t nr_pages)
{
long rc;
void *kaddr;
rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS,
&kaddr, NULL);
if (rc < 0)
return dax_mem2blk_err(rc);
memset(kaddr, 0, nr_pages << PAGE_SHIFT);
dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
return 0;
}
static const struct dax_operations dcssblk_dax_ops = {
.direct_access = dcssblk_dax_direct_access,
.zero_page_range = dcssblk_dax_zero_page_range,
};
struct dcssblk_dev_info {
struct list_head lh;
struct device dev;
char segment_name[DCSS_BUS_ID_SIZE];
atomic_t use_count;
struct gendisk *gd;
unsigned long start;
unsigned long end;
int segment_type;
unsigned char save_pending;
unsigned char is_shared;
int num_of_segments;
struct list_head seg_list;
struct dax_device *dax_dev;
};
struct segment_info {
struct list_head lh;
char segment_name[DCSS_BUS_ID_SIZE];
unsigned long start;
unsigned long end;
int segment_type;
};
static ssize_t dcssblk_add_store(struct device * dev, struct device_attribute *attr, const char * buf,
size_t count);
static ssize_t dcssblk_remove_store(struct device * dev, struct device_attribute *attr, const char * buf,
size_t count);
static DEVICE_ATTR(add, S_IWUSR, NULL, dcssblk_add_store);
static DEVICE_ATTR(remove, S_IWUSR, NULL, dcssblk_remove_store);
static struct device *dcssblk_root_dev;
static LIST_HEAD(dcssblk_devices);
static struct rw_semaphore dcssblk_devices_sem;
/*
* release function for segment device.
*/
static void
dcssblk_release_segment(struct device *dev)
{
struct dcssblk_dev_info *dev_info;
struct segment_info *entry, *temp;
dev_info = container_of(dev, struct dcssblk_dev_info, dev);
list_for_each_entry_safe(entry, temp, &dev_info->seg_list, lh) {
list_del(&entry->lh);
kfree(entry);
}
kfree(dev_info);
module_put(THIS_MODULE);
}
/*
* get a minor number. needs to be called with
* down_write(&dcssblk_devices_sem) and the
* device needs to be enqueued before the semaphore is
* freed.
*/
static int
dcssblk_assign_free_minor(struct dcssblk_dev_info *dev_info)
{
int minor, found;
struct dcssblk_dev_info *entry;
if (dev_info == NULL)
return -EINVAL;
for (minor = 0; minor < (1<<MINORBITS); minor++) {
found = 0;
// test if minor available
list_for_each_entry(entry, &dcssblk_devices, lh)
if (minor == entry->gd->first_minor)
found++;
if (!found) break; // got unused minor
}
if (found)
return -EBUSY;
dev_info->gd->first_minor = minor;
return 0;
}
/*
* get the struct dcssblk_dev_info from dcssblk_devices
* for the given name.
* down_read(&dcssblk_devices_sem) must be held.
*/
static struct dcssblk_dev_info *
dcssblk_get_device_by_name(char *name)
{
struct dcssblk_dev_info *entry;
list_for_each_entry(entry, &dcssblk_devices, lh) {
if (!strcmp(name, entry->segment_name)) {
return entry;
}
}
return NULL;
}
/*
* get the struct segment_info from seg_list
* for the given name.
* down_read(&dcssblk_devices_sem) must be held.
*/
static struct segment_info *
dcssblk_get_segment_by_name(char *name)
{
struct dcssblk_dev_info *dev_info;
struct segment_info *entry;
list_for_each_entry(dev_info, &dcssblk_devices, lh) {
list_for_each_entry(entry, &dev_info->seg_list, lh) {
if (!strcmp(name, entry->segment_name))
return entry;
}
}
return NULL;
}
/*
* get the highest address of the multi-segment block.
*/
static unsigned long
dcssblk_find_highest_addr(struct dcssblk_dev_info *dev_info)
{
unsigned long highest_addr;
struct segment_info *entry;
highest_addr = 0;
list_for_each_entry(entry, &dev_info->seg_list, lh) {
if (highest_addr < entry->end)
highest_addr = entry->end;
}
return highest_addr;
}
/*
* get the lowest address of the multi-segment block.
*/
static unsigned long
dcssblk_find_lowest_addr(struct dcssblk_dev_info *dev_info)
{
int set_first;
unsigned long lowest_addr;
struct segment_info *entry;
set_first = 0;
lowest_addr = 0;
list_for_each_entry(entry, &dev_info->seg_list, lh) {
if (set_first == 0) {
lowest_addr = entry->start;
set_first = 1;
} else {
if (lowest_addr > entry->start)
lowest_addr = entry->start;
}
}
return lowest_addr;
}
/*
* Check continuity of segments.
*/
static int
dcssblk_is_continuous(struct dcssblk_dev_info *dev_info)
{
int i, j, rc;
struct segment_info *sort_list, *entry, temp;
if (dev_info->num_of_segments <= 1)
return 0;
treewide: kzalloc() -> kcalloc() The kzalloc() function has a 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a * b, gfp) as well as handling cases of: kzalloc(a * b * c, gfp) with: kzalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kzalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kzalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(char) * COUNT + COUNT , ...) | kzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kzalloc + kcalloc ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kzalloc(C1 * C2 * C3, ...) | kzalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kzalloc(sizeof(THING) * C2, ...) | kzalloc(sizeof(TYPE) * C2, ...) | kzalloc(C1 * C2 * C3, ...) | kzalloc(C1 * C2, ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - (E1) * E2 + E1, E2 , ...) | - kzalloc + kcalloc ( - (E1) * (E2) + E1, E2 , ...) | - kzalloc + kcalloc ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 05:03:40 +08:00
sort_list = kcalloc(dev_info->num_of_segments,
sizeof(struct segment_info),
GFP_KERNEL);
if (sort_list == NULL)
return -ENOMEM;
i = 0;
list_for_each_entry(entry, &dev_info->seg_list, lh) {
memcpy(&sort_list[i], entry, sizeof(struct segment_info));
i++;
}
/* sort segments */
for (i = 0; i < dev_info->num_of_segments; i++)
for (j = 0; j < dev_info->num_of_segments; j++)
if (sort_list[j].start > sort_list[i].start) {
memcpy(&temp, &sort_list[i],
sizeof(struct segment_info));
memcpy(&sort_list[i], &sort_list[j],
sizeof(struct segment_info));
memcpy(&sort_list[j], &temp,
sizeof(struct segment_info));
}
/* check continuity */
for (i = 0; i < dev_info->num_of_segments - 1; i++) {
if ((sort_list[i].end + 1) != sort_list[i+1].start) {
pr_err("Adjacent DCSSs %s and %s are not "
"contiguous\n", sort_list[i].segment_name,
sort_list[i+1].segment_name);
rc = -EINVAL;
goto out;
}
/* EN and EW are allowed in a block device */
if (sort_list[i].segment_type != sort_list[i+1].segment_type) {
if (!(sort_list[i].segment_type & SEGMENT_EXCLUSIVE) ||
(sort_list[i].segment_type == SEG_TYPE_ER) ||
!(sort_list[i+1].segment_type &
SEGMENT_EXCLUSIVE) ||
(sort_list[i+1].segment_type == SEG_TYPE_ER)) {
pr_err("DCSS %s and DCSS %s have "
"incompatible types\n",
sort_list[i].segment_name,
sort_list[i+1].segment_name);
rc = -EINVAL;
goto out;
}
}
}
rc = 0;
out:
kfree(sort_list);
return rc;
}
/*
* Load a segment
*/
static int
dcssblk_load_segment(char *name, struct segment_info **seg_info)
{
int rc;
/* already loaded? */
down_read(&dcssblk_devices_sem);
*seg_info = dcssblk_get_segment_by_name(name);
up_read(&dcssblk_devices_sem);
if (*seg_info != NULL)
return -EEXIST;
/* get a struct segment_info */
*seg_info = kzalloc(sizeof(struct segment_info), GFP_KERNEL);
if (*seg_info == NULL)
return -ENOMEM;
strcpy((*seg_info)->segment_name, name);
/* load the segment */
rc = segment_load(name, SEGMENT_SHARED,
&(*seg_info)->start, &(*seg_info)->end);
if (rc < 0) {
segment_warning(rc, (*seg_info)->segment_name);
kfree(*seg_info);
} else {
INIT_LIST_HEAD(&(*seg_info)->lh);
(*seg_info)->segment_type = rc;
}
return rc;
}
/*
* device attribute for switching shared/nonshared (exclusive)
* operation (show + store)
*/
static ssize_t
dcssblk_shared_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct dcssblk_dev_info *dev_info;
dev_info = container_of(dev, struct dcssblk_dev_info, dev);
return sprintf(buf, dev_info->is_shared ? "1\n" : "0\n");
}
static ssize_t
dcssblk_shared_store(struct device *dev, struct device_attribute *attr, const char *inbuf, size_t count)
{
struct dcssblk_dev_info *dev_info;
struct segment_info *entry, *temp;
int rc;
if ((count > 1) && (inbuf[1] != '\n') && (inbuf[1] != '\0'))
return -EINVAL;
down_write(&dcssblk_devices_sem);
dev_info = container_of(dev, struct dcssblk_dev_info, dev);
if (atomic_read(&dev_info->use_count)) {
rc = -EBUSY;
goto out;
}
if (inbuf[0] == '1') {
/* reload segments in shared mode */
list_for_each_entry(entry, &dev_info->seg_list, lh) {
rc = segment_modify_shared(entry->segment_name,
SEGMENT_SHARED);
if (rc < 0) {
BUG_ON(rc == -EINVAL);
if (rc != -EAGAIN)
goto removeseg;
}
}
dev_info->is_shared = 1;
switch (dev_info->segment_type) {
case SEG_TYPE_SR:
case SEG_TYPE_ER:
case SEG_TYPE_SC:
set_disk_ro(dev_info->gd, 1);
}
} else if (inbuf[0] == '0') {
/* reload segments in exclusive mode */
if (dev_info->segment_type == SEG_TYPE_SC) {
pr_err("DCSS %s is of type SC and cannot be "
"loaded as exclusive-writable\n",
dev_info->segment_name);
rc = -EINVAL;
goto out;
}
list_for_each_entry(entry, &dev_info->seg_list, lh) {
rc = segment_modify_shared(entry->segment_name,
SEGMENT_EXCLUSIVE);
if (rc < 0) {
BUG_ON(rc == -EINVAL);
if (rc != -EAGAIN)
goto removeseg;
}
}
dev_info->is_shared = 0;
set_disk_ro(dev_info->gd, 0);
} else {
rc = -EINVAL;
goto out;
}
rc = count;
goto out;
removeseg:
pr_err("DCSS device %s is removed after a failed access mode "
"change\n", dev_info->segment_name);
temp = entry;
list_for_each_entry(entry, &dev_info->seg_list, lh) {
if (entry != temp)
segment_unload(entry->segment_name);
}
list_del(&dev_info->lh);
s390/dcssblk: fix lockdep warning dcssblk_remove_store() holds the dcssblk_devices_sem semaphore while calling del_gendisk(dev_info->gd), which in turn tries to acquire disk->open_mutex. Then there is dcssblk_release(), which is called with disk->open_mutex held, and tries to acquire dcssblk_devices_sem. Lockdep reports this as possible circular locking dependency (CPU0 = dcssblk_remove_store, CPU1 = dcssblk_release): [ 44.948865] Possible unsafe locking scenario: [ 44.948866] CPU0 CPU1 [ 44.948867] ---- ---- [ 44.948868] lock(&dcssblk_devices_sem); [ 44.948870] lock(&disk->open_mutex); [ 44.948872] lock(&dcssblk_devices_sem); [ 44.948874] lock(&disk->open_mutex); [ 44.948876] *** DEADLOCK *** In practice, this deadlock should not happen, since dcssblk_remove_store() checks for dev_info->use_count != 0 after acquiring dcssblk_devices_sem, and breaks out before calling del_gendisk(). dev_info->use_count will be decremented in dcssblk_release(), protected by dcssblk_devices_sem. Still there is no need for dcssblk_remove_store() to hold the dcssblk_devices_sem until after calling del_gendisk(), as this only protects dcssblk internal data. So fix the lockdep warning by releasing dcssblk_devices_sem earlier. Also move the segment_unload() loop up, similar to dcssblk_shared_store() error path, no need to do that after calling del_gendisk(). Also change dcssblk_shared_store() error path, where dcssblk_devices_sem was also released only after calling del_gendisk(), and a similar lockdep warning could be triggered (but also deadlock prevented by check for dev_info->use_count). Acked-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2023-08-22 23:19:32 +08:00
up_write(&dcssblk_devices_sem);
s390/dcssblk: fix kernel crash with list_add corruption Commit fb08a1908cb1 ("dax: simplify the dax_device <-> gendisk association") introduced new logic for gendisk association, requiring drivers to explicitly call dax_add_host() and dax_remove_host(). For dcssblk driver, some dax_remove_host() calls were missing, e.g. in device remove path. The commit also broke error handling for out_dax case in device add path, resulting in an extra put_device() w/o the previous get_device() in that case. This lead to stale xarray entries after device add / remove cycles. In the case when a previously used struct gendisk pointer (xarray index) would be used again, because blk_alloc_disk() happened to return such a pointer, the xa_insert() in dax_add_host() would fail and go to out_dax, doing the extra put_device() in the error path. In combination with an already flawed error handling in dcssblk (device_register() cleanup), which needs to be addressed in a separate patch, this resulted in a missing device_del() / klist_del(), and eventually in the kernel crash with list_add corruption on a subsequent device_add() / klist_add(). Fix this by adding the missing dax_remove_host() calls, and also move the put_device() in the error path to restore the previous logic. Fixes: fb08a1908cb1 ("dax: simplify the dax_device <-> gendisk association") Cc: <stable@vger.kernel.org> # 5.17+ Acked-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2023-08-10 16:22:36 +08:00
dax_remove_host(dev_info->gd);
kill_dax(dev_info->dax_dev);
put_dax(dev_info->dax_dev);
del_gendisk(dev_info->gd);
put_disk(dev_info->gd);
if (device_remove_file_self(dev, attr)) {
device_unregister(dev);
put_device(dev);
}
return rc;
out:
up_write(&dcssblk_devices_sem);
return rc;
}
static DEVICE_ATTR(shared, S_IWUSR | S_IRUSR, dcssblk_shared_show,
dcssblk_shared_store);
/*
* device attribute for save operation on current copy
* of the segment. If the segment is busy, saving will
* become pending until it gets released, which can be
* undone by storing a non-true value to this entry.
* (show + store)
*/
static ssize_t
dcssblk_save_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct dcssblk_dev_info *dev_info;
dev_info = container_of(dev, struct dcssblk_dev_info, dev);
return sprintf(buf, dev_info->save_pending ? "1\n" : "0\n");
}
static ssize_t
dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char *inbuf, size_t count)
{
struct dcssblk_dev_info *dev_info;
struct segment_info *entry;
if ((count > 1) && (inbuf[1] != '\n') && (inbuf[1] != '\0'))
return -EINVAL;
dev_info = container_of(dev, struct dcssblk_dev_info, dev);
down_write(&dcssblk_devices_sem);
if (inbuf[0] == '1') {
if (atomic_read(&dev_info->use_count) == 0) {
// device is idle => we save immediately
pr_info("All DCSSs that map to device %s are "
"saved\n", dev_info->segment_name);
list_for_each_entry(entry, &dev_info->seg_list, lh) {
if (entry->segment_type == SEG_TYPE_EN ||
entry->segment_type == SEG_TYPE_SN)
pr_warn("DCSS %s is of type SN or EN"
" and cannot be saved\n",
entry->segment_name);
else
segment_save(entry->segment_name);
}
} else {
// device is busy => we save it when it becomes
// idle in dcssblk_release
pr_info("Device %s is in use, its DCSSs will be "
"saved when it becomes idle\n",
dev_info->segment_name);
dev_info->save_pending = 1;
}
} else if (inbuf[0] == '0') {
if (dev_info->save_pending) {
// device is busy & the user wants to undo his save
// request
dev_info->save_pending = 0;
pr_info("A pending save request for device %s "
"has been canceled\n",
dev_info->segment_name);
}
} else {
up_write(&dcssblk_devices_sem);
return -EINVAL;
}
up_write(&dcssblk_devices_sem);
return count;
}
static DEVICE_ATTR(save, S_IWUSR | S_IRUSR, dcssblk_save_show,
dcssblk_save_store);
/*
* device attribute for showing all segments in a device
*/
static ssize_t
dcssblk_seglist_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
int i;
struct dcssblk_dev_info *dev_info;
struct segment_info *entry;
down_read(&dcssblk_devices_sem);
dev_info = container_of(dev, struct dcssblk_dev_info, dev);
i = 0;
buf[0] = '\0';
list_for_each_entry(entry, &dev_info->seg_list, lh) {
strcpy(&buf[i], entry->segment_name);
i += strlen(entry->segment_name);
buf[i] = '\n';
i++;
}
up_read(&dcssblk_devices_sem);
return i;
}
static DEVICE_ATTR(seglist, S_IRUSR, dcssblk_seglist_show, NULL);
static struct attribute *dcssblk_dev_attrs[] = {
&dev_attr_shared.attr,
&dev_attr_save.attr,
&dev_attr_seglist.attr,
NULL,
};
static struct attribute_group dcssblk_dev_attr_group = {
.attrs = dcssblk_dev_attrs,
};
static const struct attribute_group *dcssblk_dev_attr_groups[] = {
&dcssblk_dev_attr_group,
NULL,
};
/*
* device attribute for adding devices
*/
static ssize_t
dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
{
int rc, i, j, num_of_segments;
struct dcssblk_dev_info *dev_info;
struct segment_info *seg_info, *temp;
char *local_buf;
unsigned long seg_byte_size;
dev_info = NULL;
seg_info = NULL;
if (dev != dcssblk_root_dev) {
rc = -EINVAL;
goto out_nobuf;
}
if ((count < 1) || (buf[0] == '\0') || (buf[0] == '\n')) {
rc = -ENAMETOOLONG;
goto out_nobuf;
}
local_buf = kmalloc(count + 1, GFP_KERNEL);
if (local_buf == NULL) {
rc = -ENOMEM;
goto out_nobuf;
}
/*
* parse input
*/
num_of_segments = 0;
for (i = 0; (i < count && (buf[i] != '\0') && (buf[i] != '\n')); i++) {
for (j = i; j < count &&
(buf[j] != ':') &&
(buf[j] != '\0') &&
(buf[j] != '\n'); j++) {
local_buf[j-i] = toupper(buf[j]);
}
local_buf[j-i] = '\0';
if (((j - i) == 0) || ((j - i) > 8)) {
rc = -ENAMETOOLONG;
goto seg_list_del;
}
rc = dcssblk_load_segment(local_buf, &seg_info);
if (rc < 0)
goto seg_list_del;
/*
* get a struct dcssblk_dev_info
*/
if (num_of_segments == 0) {
dev_info = kzalloc(sizeof(struct dcssblk_dev_info),
GFP_KERNEL);
if (dev_info == NULL) {
rc = -ENOMEM;
goto out;
}
strcpy(dev_info->segment_name, local_buf);
dev_info->segment_type = seg_info->segment_type;
INIT_LIST_HEAD(&dev_info->seg_list);
}
list_add_tail(&seg_info->lh, &dev_info->seg_list);
num_of_segments++;
i = j;
if ((buf[j] == '\0') || (buf[j] == '\n'))
break;
}
/* no trailing colon at the end of the input */
if ((i > 0) && (buf[i-1] == ':')) {
rc = -ENAMETOOLONG;
goto seg_list_del;
}
strscpy(local_buf, buf, i + 1);
dev_info->num_of_segments = num_of_segments;
rc = dcssblk_is_continuous(dev_info);
if (rc < 0)
goto seg_list_del;
dev_info->start = dcssblk_find_lowest_addr(dev_info);
dev_info->end = dcssblk_find_highest_addr(dev_info);
dev_set_name(&dev_info->dev, "%s", dev_info->segment_name);
dev_info->dev.release = dcssblk_release_segment;
dev_info->dev.groups = dcssblk_dev_attr_groups;
INIT_LIST_HEAD(&dev_info->lh);
dev_info->gd = blk_alloc_disk(NUMA_NO_NODE);
if (dev_info->gd == NULL) {
rc = -ENOMEM;
goto seg_list_del;
}
dev_info->gd->major = dcssblk_major;
dev_info->gd->minors = DCSSBLK_MINORS_PER_DISK;
dev_info->gd->fops = &dcssblk_devops;
dev_info->gd->private_data = dev_info;
dev_info->gd->flags |= GENHD_FL_NO_PART;
blk_queue_logical_block_size(dev_info->gd->queue, 4096);
blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->gd->queue);
seg_byte_size = (dev_info->end - dev_info->start + 1);
set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
pr_info("Loaded %s with total size %lu bytes and capacity %lu "
"sectors\n", local_buf, seg_byte_size, seg_byte_size >> 9);
dev_info->save_pending = 0;
dev_info->is_shared = 1;
dev_info->dev.parent = dcssblk_root_dev;
/*
*get minor, add to list
*/
down_write(&dcssblk_devices_sem);
if (dcssblk_get_segment_by_name(local_buf)) {
rc = -EEXIST;
goto release_gd;
}
rc = dcssblk_assign_free_minor(dev_info);
if (rc)
goto release_gd;
sprintf(dev_info->gd->disk_name, "dcssblk%d",
dev_info->gd->first_minor);
list_add_tail(&dev_info->lh, &dcssblk_devices);
if (!try_module_get(THIS_MODULE)) {
rc = -ENODEV;
goto dev_list_del;
}
/*
* register the device
*/
rc = device_register(&dev_info->dev);
if (rc)
goto put_dev;
dev_info->dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops);
if (IS_ERR(dev_info->dax_dev)) {
rc = PTR_ERR(dev_info->dax_dev);
dev_info->dax_dev = NULL;
goto put_dev;
}
set_dax_synchronous(dev_info->dax_dev);
rc = dax_add_host(dev_info->dax_dev, dev_info->gd);
if (rc)
goto out_dax;
get_device(&dev_info->dev);
rc = device_add_disk(&dev_info->dev, dev_info->gd, NULL);
if (rc)
goto out_dax_host;
switch (dev_info->segment_type) {
case SEG_TYPE_SR:
case SEG_TYPE_ER:
case SEG_TYPE_SC:
set_disk_ro(dev_info->gd,1);
break;
default:
set_disk_ro(dev_info->gd,0);
break;
}
up_write(&dcssblk_devices_sem);
rc = count;
goto out;
out_dax_host:
s390/dcssblk: fix kernel crash with list_add corruption Commit fb08a1908cb1 ("dax: simplify the dax_device <-> gendisk association") introduced new logic for gendisk association, requiring drivers to explicitly call dax_add_host() and dax_remove_host(). For dcssblk driver, some dax_remove_host() calls were missing, e.g. in device remove path. The commit also broke error handling for out_dax case in device add path, resulting in an extra put_device() w/o the previous get_device() in that case. This lead to stale xarray entries after device add / remove cycles. In the case when a previously used struct gendisk pointer (xarray index) would be used again, because blk_alloc_disk() happened to return such a pointer, the xa_insert() in dax_add_host() would fail and go to out_dax, doing the extra put_device() in the error path. In combination with an already flawed error handling in dcssblk (device_register() cleanup), which needs to be addressed in a separate patch, this resulted in a missing device_del() / klist_del(), and eventually in the kernel crash with list_add corruption on a subsequent device_add() / klist_add(). Fix this by adding the missing dax_remove_host() calls, and also move the put_device() in the error path to restore the previous logic. Fixes: fb08a1908cb1 ("dax: simplify the dax_device <-> gendisk association") Cc: <stable@vger.kernel.org> # 5.17+ Acked-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2023-08-10 16:22:36 +08:00
put_device(&dev_info->dev);
dax_remove_host(dev_info->gd);
out_dax:
kill_dax(dev_info->dax_dev);
put_dax(dev_info->dax_dev);
put_dev:
list_del(&dev_info->lh);
put_disk(dev_info->gd);
list_for_each_entry(seg_info, &dev_info->seg_list, lh) {
segment_unload(seg_info->segment_name);
}
put_device(&dev_info->dev);
up_write(&dcssblk_devices_sem);
goto out;
dev_list_del:
list_del(&dev_info->lh);
release_gd:
put_disk(dev_info->gd);
up_write(&dcssblk_devices_sem);
seg_list_del:
if (dev_info == NULL)
goto out;
list_for_each_entry_safe(seg_info, temp, &dev_info->seg_list, lh) {
list_del(&seg_info->lh);
segment_unload(seg_info->segment_name);
kfree(seg_info);
}
kfree(dev_info);
out:
kfree(local_buf);
out_nobuf:
return rc;
}
/*
* device attribute for removing devices
*/
static ssize_t
dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
{
struct dcssblk_dev_info *dev_info;
struct segment_info *entry;
int rc, i;
char *local_buf;
if (dev != dcssblk_root_dev) {
return -EINVAL;
}
local_buf = kmalloc(count + 1, GFP_KERNEL);
if (local_buf == NULL) {
return -ENOMEM;
}
/*
* parse input
*/
for (i = 0; (i < count && (*(buf+i)!='\0') && (*(buf+i)!='\n')); i++) {
local_buf[i] = toupper(buf[i]);
}
local_buf[i] = '\0';
if ((i == 0) || (i > 8)) {
rc = -ENAMETOOLONG;
goto out_buf;
}
down_write(&dcssblk_devices_sem);
dev_info = dcssblk_get_device_by_name(local_buf);
if (dev_info == NULL) {
up_write(&dcssblk_devices_sem);
pr_warn("Device %s cannot be removed because it is not a known device\n",
local_buf);
rc = -ENODEV;
goto out_buf;
}
if (atomic_read(&dev_info->use_count) != 0) {
up_write(&dcssblk_devices_sem);
pr_warn("Device %s cannot be removed while it is in use\n",
local_buf);
rc = -EBUSY;
goto out_buf;
}
list_del(&dev_info->lh);
s390/dcssblk: fix lockdep warning dcssblk_remove_store() holds the dcssblk_devices_sem semaphore while calling del_gendisk(dev_info->gd), which in turn tries to acquire disk->open_mutex. Then there is dcssblk_release(), which is called with disk->open_mutex held, and tries to acquire dcssblk_devices_sem. Lockdep reports this as possible circular locking dependency (CPU0 = dcssblk_remove_store, CPU1 = dcssblk_release): [ 44.948865] Possible unsafe locking scenario: [ 44.948866] CPU0 CPU1 [ 44.948867] ---- ---- [ 44.948868] lock(&dcssblk_devices_sem); [ 44.948870] lock(&disk->open_mutex); [ 44.948872] lock(&dcssblk_devices_sem); [ 44.948874] lock(&disk->open_mutex); [ 44.948876] *** DEADLOCK *** In practice, this deadlock should not happen, since dcssblk_remove_store() checks for dev_info->use_count != 0 after acquiring dcssblk_devices_sem, and breaks out before calling del_gendisk(). dev_info->use_count will be decremented in dcssblk_release(), protected by dcssblk_devices_sem. Still there is no need for dcssblk_remove_store() to hold the dcssblk_devices_sem until after calling del_gendisk(), as this only protects dcssblk internal data. So fix the lockdep warning by releasing dcssblk_devices_sem earlier. Also move the segment_unload() loop up, similar to dcssblk_shared_store() error path, no need to do that after calling del_gendisk(). Also change dcssblk_shared_store() error path, where dcssblk_devices_sem was also released only after calling del_gendisk(), and a similar lockdep warning could be triggered (but also deadlock prevented by check for dev_info->use_count). Acked-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2023-08-22 23:19:32 +08:00
/* unload all related segments */
list_for_each_entry(entry, &dev_info->seg_list, lh)
segment_unload(entry->segment_name);
up_write(&dcssblk_devices_sem);
s390/dcssblk: fix kernel crash with list_add corruption Commit fb08a1908cb1 ("dax: simplify the dax_device <-> gendisk association") introduced new logic for gendisk association, requiring drivers to explicitly call dax_add_host() and dax_remove_host(). For dcssblk driver, some dax_remove_host() calls were missing, e.g. in device remove path. The commit also broke error handling for out_dax case in device add path, resulting in an extra put_device() w/o the previous get_device() in that case. This lead to stale xarray entries after device add / remove cycles. In the case when a previously used struct gendisk pointer (xarray index) would be used again, because blk_alloc_disk() happened to return such a pointer, the xa_insert() in dax_add_host() would fail and go to out_dax, doing the extra put_device() in the error path. In combination with an already flawed error handling in dcssblk (device_register() cleanup), which needs to be addressed in a separate patch, this resulted in a missing device_del() / klist_del(), and eventually in the kernel crash with list_add corruption on a subsequent device_add() / klist_add(). Fix this by adding the missing dax_remove_host() calls, and also move the put_device() in the error path to restore the previous logic. Fixes: fb08a1908cb1 ("dax: simplify the dax_device <-> gendisk association") Cc: <stable@vger.kernel.org> # 5.17+ Acked-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2023-08-10 16:22:36 +08:00
dax_remove_host(dev_info->gd);
kill_dax(dev_info->dax_dev);
put_dax(dev_info->dax_dev);
del_gendisk(dev_info->gd);
put_disk(dev_info->gd);
device_unregister(&dev_info->dev);
put_device(&dev_info->dev);
rc = count;
out_buf:
kfree(local_buf);
return rc;
}
static int
dcssblk_open(struct gendisk *disk, blk_mode_t mode)
{
struct dcssblk_dev_info *dev_info = disk->private_data;
int rc;
if (NULL == dev_info) {
rc = -ENODEV;
goto out;
}
atomic_inc(&dev_info->use_count);
rc = 0;
out:
return rc;
}
static void
dcssblk_release(struct gendisk *disk)
{
struct dcssblk_dev_info *dev_info = disk->private_data;
struct segment_info *entry;
if (!dev_info) {
WARN_ON(1);
return;
}
down_write(&dcssblk_devices_sem);
if (atomic_dec_and_test(&dev_info->use_count)
&& (dev_info->save_pending)) {
pr_info("Device %s has become idle and is being saved "
"now\n", dev_info->segment_name);
list_for_each_entry(entry, &dev_info->seg_list, lh) {
if (entry->segment_type == SEG_TYPE_EN ||
entry->segment_type == SEG_TYPE_SN)
pr_warn("DCSS %s is of type SN or EN and cannot"
" be saved\n", entry->segment_name);
else
segment_save(entry->segment_name);
}
dev_info->save_pending = 0;
}
up_write(&dcssblk_devices_sem);
}
static void
dcssblk_submit_bio(struct bio *bio)
{
struct dcssblk_dev_info *dev_info;
block: Convert bio_for_each_segment() to bvec_iter More prep work for immutable biovecs - with immutable bvecs drivers won't be able to use the biovec directly, they'll need to use helpers that take into account bio->bi_iter.bi_bvec_done. This updates callers for the new usage without changing the implementation yet. Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "Ed L. Cashin" <ecashin@coraid.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Lars Ellenberg <drbd-dev@lists.linbit.com> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Paul Clements <Paul.Clements@steeleye.com> Cc: Jim Paris <jim@jtan.com> Cc: Geoff Levand <geoff@infradead.org> Cc: Yehuda Sadeh <yehuda@inktank.com> Cc: Sage Weil <sage@inktank.com> Cc: Alex Elder <elder@inktank.com> Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris <josh.h.morris@us.ibm.com> Cc: Philip Kelleher <pjk1939@linux.vnet.ibm.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Neil Brown <neilb@suse.de> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: linux390@de.ibm.com Cc: Nagalakshmi Nandigama <Nagalakshmi.Nandigama@lsi.com> Cc: Sreekanth Reddy <Sreekanth.Reddy@lsi.com> Cc: support@lsi.com Cc: "James E.J. Bottomley" <JBottomley@parallels.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com> Cc: Tejun Heo <tj@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Guo Chao <yan@linux.vnet.ibm.com> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Matthew Wilcox <matthew.r.wilcox@intel.com> Cc: Keith Busch <keith.busch@intel.com> Cc: Stephen Hemminger <shemminger@vyatta.com> Cc: Quoc-Son Anh <quoc-sonx.anh@intel.com> Cc: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Seth Jennings <sjenning@linux.vnet.ibm.com> Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Mike Snitzer <snitzer@redhat.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: "Darrick J. Wong" <darrick.wong@oracle.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Jan Kara <jack@suse.cz> Cc: linux-m68k@lists.linux-m68k.org Cc: linuxppc-dev@lists.ozlabs.org Cc: drbd-user@lists.linbit.com Cc: nbd-general@lists.sourceforge.net Cc: cbe-oss-dev@lists.ozlabs.org Cc: xen-devel@lists.xensource.com Cc: virtualization@lists.linux-foundation.org Cc: linux-raid@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: DL-MPTFusionLinux@lsi.com Cc: linux-scsi@vger.kernel.org Cc: devel@driverdev.osuosl.org Cc: linux-fsdevel@vger.kernel.org Cc: cluster-devel@redhat.com Cc: linux-mm@kvack.org Acked-by: Geoff Levand <geoff@infradead.org>
2013-11-24 09:19:00 +08:00
struct bio_vec bvec;
struct bvec_iter iter;
unsigned long index;
void *page_addr;
unsigned long source_addr;
unsigned long bytes_done;
bytes_done = 0;
dev_info = bio->bi_bdev->bd_disk->private_data;
if (dev_info == NULL)
goto fail;
if (!IS_ALIGNED(bio->bi_iter.bi_sector, 8) ||
!IS_ALIGNED(bio->bi_iter.bi_size, PAGE_SIZE))
/* Request is not page-aligned. */
goto fail;
/* verify data transfer direction */
if (dev_info->is_shared) {
switch (dev_info->segment_type) {
case SEG_TYPE_SR:
case SEG_TYPE_ER:
case SEG_TYPE_SC:
/* cannot write to these segments */
if (bio_data_dir(bio) == WRITE) {
pr_warn("Writing to %s failed because it is a read-only device\n",
dev_name(&dev_info->dev));
goto fail;
}
}
}
block: Abstract out bvec iterator Immutable biovecs are going to require an explicit iterator. To implement immutable bvecs, a later patch is going to add a bi_bvec_done member to this struct; for now, this patch effectively just renames things. Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "Ed L. Cashin" <ecashin@coraid.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Lars Ellenberg <drbd-dev@lists.linbit.com> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Matthew Wilcox <willy@linux.intel.com> Cc: Geoff Levand <geoff@infradead.org> Cc: Yehuda Sadeh <yehuda@inktank.com> Cc: Sage Weil <sage@inktank.com> Cc: Alex Elder <elder@inktank.com> Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris <josh.h.morris@us.ibm.com> Cc: Philip Kelleher <pjk1939@linux.vnet.ibm.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Neil Brown <neilb@suse.de> Cc: Alasdair Kergon <agk@redhat.com> Cc: Mike Snitzer <snitzer@redhat.com> Cc: dm-devel@redhat.com Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: linux390@de.ibm.com Cc: Boaz Harrosh <bharrosh@panasas.com> Cc: Benny Halevy <bhalevy@tonian.com> Cc: "James E.J. Bottomley" <JBottomley@parallels.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Nicholas A. Bellinger" <nab@linux-iscsi.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Chris Mason <chris.mason@fusionio.com> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Andreas Dilger <adilger.kernel@dilger.ca> Cc: Jaegeuk Kim <jaegeuk.kim@samsung.com> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Dave Kleikamp <shaggy@kernel.org> Cc: Joern Engel <joern@logfs.org> Cc: Prasad Joshi <prasadjoshi.linux@gmail.com> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: KONISHI Ryusuke <konishi.ryusuke@lab.ntt.co.jp> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Ben Myers <bpm@sgi.com> Cc: xfs@oss.sgi.com Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Len Brown <len.brown@intel.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com> Cc: Ben Hutchings <ben@decadent.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Guo Chao <yan@linux.vnet.ibm.com> Cc: Tejun Heo <tj@kernel.org> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Wei Yongjun <yongjun_wei@trendmicro.com.cn> Cc: "Roger Pau Monné" <roger.pau@citrix.com> Cc: Jan Beulich <jbeulich@suse.com> Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Cc: Ian Campbell <Ian.Campbell@citrix.com> Cc: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Jiang Liu <jiang.liu@huawei.com> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Jerome Marchand <jmarchand@redhat.com> Cc: Joe Perches <joe@perches.com> Cc: Peng Tao <tao.peng@emc.com> Cc: Andy Adamson <andros@netapp.com> Cc: fanchaoting <fanchaoting@cn.fujitsu.com> Cc: Jie Liu <jeff.liu@oracle.com> Cc: Sunil Mushran <sunil.mushran@gmail.com> Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Namjae Jeon <namjae.jeon@samsung.com> Cc: Pankaj Kumar <pankaj.km@samsung.com> Cc: Dan Magenheimer <dan.magenheimer@oracle.com> Cc: Mel Gorman <mgorman@suse.de>6
2013-10-12 06:44:27 +08:00
index = (bio->bi_iter.bi_sector >> 3);
block: Convert bio_for_each_segment() to bvec_iter More prep work for immutable biovecs - with immutable bvecs drivers won't be able to use the biovec directly, they'll need to use helpers that take into account bio->bi_iter.bi_bvec_done. This updates callers for the new usage without changing the implementation yet. Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "Ed L. Cashin" <ecashin@coraid.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Lars Ellenberg <drbd-dev@lists.linbit.com> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Paul Clements <Paul.Clements@steeleye.com> Cc: Jim Paris <jim@jtan.com> Cc: Geoff Levand <geoff@infradead.org> Cc: Yehuda Sadeh <yehuda@inktank.com> Cc: Sage Weil <sage@inktank.com> Cc: Alex Elder <elder@inktank.com> Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris <josh.h.morris@us.ibm.com> Cc: Philip Kelleher <pjk1939@linux.vnet.ibm.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Neil Brown <neilb@suse.de> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: linux390@de.ibm.com Cc: Nagalakshmi Nandigama <Nagalakshmi.Nandigama@lsi.com> Cc: Sreekanth Reddy <Sreekanth.Reddy@lsi.com> Cc: support@lsi.com Cc: "James E.J. Bottomley" <JBottomley@parallels.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com> Cc: Tejun Heo <tj@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Guo Chao <yan@linux.vnet.ibm.com> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Matthew Wilcox <matthew.r.wilcox@intel.com> Cc: Keith Busch <keith.busch@intel.com> Cc: Stephen Hemminger <shemminger@vyatta.com> Cc: Quoc-Son Anh <quoc-sonx.anh@intel.com> Cc: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Seth Jennings <sjenning@linux.vnet.ibm.com> Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Mike Snitzer <snitzer@redhat.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: "Darrick J. Wong" <darrick.wong@oracle.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Jan Kara <jack@suse.cz> Cc: linux-m68k@lists.linux-m68k.org Cc: linuxppc-dev@lists.ozlabs.org Cc: drbd-user@lists.linbit.com Cc: nbd-general@lists.sourceforge.net Cc: cbe-oss-dev@lists.ozlabs.org Cc: xen-devel@lists.xensource.com Cc: virtualization@lists.linux-foundation.org Cc: linux-raid@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: DL-MPTFusionLinux@lsi.com Cc: linux-scsi@vger.kernel.org Cc: devel@driverdev.osuosl.org Cc: linux-fsdevel@vger.kernel.org Cc: cluster-devel@redhat.com Cc: linux-mm@kvack.org Acked-by: Geoff Levand <geoff@infradead.org>
2013-11-24 09:19:00 +08:00
bio_for_each_segment(bvec, bio, iter) {
page_addr = bvec_virt(&bvec);
source_addr = dev_info->start + (index<<12) + bytes_done;
if (unlikely(!IS_ALIGNED((unsigned long)page_addr, PAGE_SIZE) ||
!IS_ALIGNED(bvec.bv_len, PAGE_SIZE)))
// More paranoia.
goto fail;
if (bio_data_dir(bio) == READ)
memcpy(page_addr, __va(source_addr), bvec.bv_len);
else
memcpy(__va(source_addr), page_addr, bvec.bv_len);
block: Convert bio_for_each_segment() to bvec_iter More prep work for immutable biovecs - with immutable bvecs drivers won't be able to use the biovec directly, they'll need to use helpers that take into account bio->bi_iter.bi_bvec_done. This updates callers for the new usage without changing the implementation yet. Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "Ed L. Cashin" <ecashin@coraid.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Lars Ellenberg <drbd-dev@lists.linbit.com> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Paul Clements <Paul.Clements@steeleye.com> Cc: Jim Paris <jim@jtan.com> Cc: Geoff Levand <geoff@infradead.org> Cc: Yehuda Sadeh <yehuda@inktank.com> Cc: Sage Weil <sage@inktank.com> Cc: Alex Elder <elder@inktank.com> Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris <josh.h.morris@us.ibm.com> Cc: Philip Kelleher <pjk1939@linux.vnet.ibm.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Neil Brown <neilb@suse.de> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: linux390@de.ibm.com Cc: Nagalakshmi Nandigama <Nagalakshmi.Nandigama@lsi.com> Cc: Sreekanth Reddy <Sreekanth.Reddy@lsi.com> Cc: support@lsi.com Cc: "James E.J. Bottomley" <JBottomley@parallels.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com> Cc: Tejun Heo <tj@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Guo Chao <yan@linux.vnet.ibm.com> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Matthew Wilcox <matthew.r.wilcox@intel.com> Cc: Keith Busch <keith.busch@intel.com> Cc: Stephen Hemminger <shemminger@vyatta.com> Cc: Quoc-Son Anh <quoc-sonx.anh@intel.com> Cc: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Seth Jennings <sjenning@linux.vnet.ibm.com> Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Mike Snitzer <snitzer@redhat.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: "Darrick J. Wong" <darrick.wong@oracle.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Jan Kara <jack@suse.cz> Cc: linux-m68k@lists.linux-m68k.org Cc: linuxppc-dev@lists.ozlabs.org Cc: drbd-user@lists.linbit.com Cc: nbd-general@lists.sourceforge.net Cc: cbe-oss-dev@lists.ozlabs.org Cc: xen-devel@lists.xensource.com Cc: virtualization@lists.linux-foundation.org Cc: linux-raid@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: DL-MPTFusionLinux@lsi.com Cc: linux-scsi@vger.kernel.org Cc: devel@driverdev.osuosl.org Cc: linux-fsdevel@vger.kernel.org Cc: cluster-devel@redhat.com Cc: linux-mm@kvack.org Acked-by: Geoff Levand <geoff@infradead.org>
2013-11-24 09:19:00 +08:00
bytes_done += bvec.bv_len;
}
bio_endio(bio);
return;
fail:
bio_io_error(bio);
}
static long
__dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
resource_size_t offset = pgoff * PAGE_SIZE;
unsigned long dev_sz;
dev_sz = dev_info->end - dev_info->start + 1;
if (kaddr)
*kaddr = (void *) dev_info->start + offset;
if (pfn)
*pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset),
PFN_DEV|PFN_SPECIAL);
return (dev_sz - offset) / PAGE_SIZE;
}
static long
dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
long nr_pages, enum dax_access_mode mode, void **kaddr,
pfn_t *pfn)
{
struct dcssblk_dev_info *dev_info = dax_get_private(dax_dev);
return __dcssblk_direct_access(dev_info, pgoff, nr_pages, kaddr, pfn);
}
static void
dcssblk_check_params(void)
{
int rc, i, j, k;
char buf[DCSSBLK_PARM_LEN + 1];
struct dcssblk_dev_info *dev_info;
for (i = 0; (i < DCSSBLK_PARM_LEN) && (dcssblk_segments[i] != '\0');
i++) {
for (j = i; (j < DCSSBLK_PARM_LEN) &&
(dcssblk_segments[j] != ',') &&
(dcssblk_segments[j] != '\0') &&
(dcssblk_segments[j] != '('); j++)
{
buf[j-i] = dcssblk_segments[j];
}
buf[j-i] = '\0';
rc = dcssblk_add_store(dcssblk_root_dev, NULL, buf, j-i);
if ((rc >= 0) && (dcssblk_segments[j] == '(')) {
for (k = 0; (buf[k] != ':') && (buf[k] != '\0'); k++)
buf[k] = toupper(buf[k]);
buf[k] = '\0';
if (!strncmp(&dcssblk_segments[j], "(local)", 7)) {
down_read(&dcssblk_devices_sem);
dev_info = dcssblk_get_device_by_name(buf);
up_read(&dcssblk_devices_sem);
if (dev_info)
dcssblk_shared_store(&dev_info->dev,
NULL, "0\n", 2);
}
}
while ((dcssblk_segments[j] != ',') &&
(dcssblk_segments[j] != '\0'))
{
j++;
}
if (dcssblk_segments[j] == '\0')
break;
i = j;
}
}
/*
* The init/exit functions.
*/
static void __exit
dcssblk_exit(void)
{
root_device_unregister(dcssblk_root_dev);
unregister_blkdev(dcssblk_major, DCSSBLK_NAME);
}
static int __init
dcssblk_init(void)
{
int rc;
dcssblk_root_dev = root_device_register("dcssblk");
if (IS_ERR(dcssblk_root_dev))
return PTR_ERR(dcssblk_root_dev);
rc = device_create_file(dcssblk_root_dev, &dev_attr_add);
if (rc)
goto out_root;
rc = device_create_file(dcssblk_root_dev, &dev_attr_remove);
if (rc)
goto out_root;
rc = register_blkdev(0, DCSSBLK_NAME);
if (rc < 0)
goto out_root;
dcssblk_major = rc;
init_rwsem(&dcssblk_devices_sem);
dcssblk_check_params();
return 0;
out_root:
root_device_unregister(dcssblk_root_dev);
return rc;
}
module_init(dcssblk_init);
module_exit(dcssblk_exit);
module_param_string(segments, dcssblk_segments, DCSSBLK_PARM_LEN, 0444);
MODULE_PARM_DESC(segments, "Name of DCSS segment(s) to be loaded, "
"comma-separated list, names in each set separated "
"by commas are separated by colons, each set contains "
"names of contiguous segments and each name max. 8 chars.\n"
"Adding \"(local)\" to the end of each set equals echoing 0 "
"to /sys/devices/dcssblk/<device name>/shared after loading "
"the contiguous segments - \n"
"e.g. segments=\"mydcss1,mydcss2:mydcss3,mydcss4(local)\"");
MODULE_LICENSE("GPL");