mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-24 20:54:10 +08:00
- initially based on Jens' 'for-4.8/core' (given all the flag churn) and
later merged with 'for-4.8/core' to pickup the QUEUE_FLAG_DAX commits that DM depends on to provide its DAX support - clean up the bio-based vs request-based DM core code by moving the request-based DM core code out to dm-rq.[hc] - reinstate bio-based support in the DM multipath target (done with the idea that fast storage like NVMe over Fabrics could benefit) -- while preserving support for request_fn and blk-mq request-based DM mpath - SCSI and DM multipath persistent reservation fixes that were coordinated with Martin Petersen. - the DM raid target saw the most extensive change this cycle; it now provides reshape and takeover support (by layering ontop of the corresponding MD capabilities) - DAX support for DM core and the linear, stripe and error targets - A DM thin-provisioning block discard vs allocation race fix that addresses potential for corruption - A stable fix for DM verity-fec's block calculation during decode - A few cleanups and fixes to DM core and various targets -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJXkRZmAAoJEMUj8QotnQNat2wH/i4LpkoGI5tI6UhyKWxRkzJp vKaJ0zuZ2Ez73DucJujNuvaiyHq1IjHD5pfr8JQO3E8ygDkRC2KjF2O8EXp0Has6 U1uLahQej72MAs0ZJTpvfE+JiY6qyIl4K+xxuPmYm2f2S5TWTIgOetYjJQmcMlQo Y8zFfcDYn4Dv5rMdvDT4+1ePETxq74wcBwTxyW3OAbHE1f0JjsUGdMKzXB1iTWcM VjLjWI//ETfFdIlDO0w2Qbd90aLUjmTR2k67RGnbPj5kNUNikv/X6iiY32KERR/0 vMiiJ7JS+a44P7FJqCMoAVM/oBYFiSNpS4LYevOgHb0G0ikF8kaSeqBPC6sMYvg= =uYt9 -----END PGP SIGNATURE----- Merge tag 'dm-4.8-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper updates from Mike Snitzer: - initially based on Jens' 'for-4.8/core' (given all the flag churn) and later merged with 'for-4.8/core' to pickup the QUEUE_FLAG_DAX commits that DM depends on to provide its DAX support - clean up the bio-based vs request-based DM core code by moving the request-based DM core code out to dm-rq.[hc] - reinstate bio-based support in the DM multipath target (done with the idea that fast storage like NVMe over Fabrics could benefit) -- while preserving support for request_fn and blk-mq request-based DM mpath - SCSI and DM multipath persistent reservation fixes that were coordinated with Martin Petersen. - the DM raid target saw the most extensive change this cycle; it now provides reshape and takeover support (by layering ontop of the corresponding MD capabilities) - DAX support for DM core and the linear, stripe and error targets - a DM thin-provisioning block discard vs allocation race fix that addresses potential for corruption - a stable fix for DM verity-fec's block calculation during decode - a few cleanups and fixes to DM core and various targets * tag 'dm-4.8-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (73 commits) dm: allow bio-based table to be upgraded to bio-based with DAX support dm snap: add fake origin_direct_access dm stripe: add DAX support dm error: add DAX support dm linear: add DAX support dm: add infrastructure for DAX support dm thin: fix a race condition between discarding and provisioning a block dm btree: fix a bug in dm_btree_find_next_single() dm raid: fix random optimal_io_size for raid0 dm raid: address checkpatch.pl complaints dm: call PR reserve/unreserve on each underlying device sd: don't use the ALL_TG_PT bit for reservations dm: fix second blk_delay_queue() parameter to be in msec units not jiffies dm raid: change logical functions to actually return bool dm raid: use rdev_for_each in status dm raid: use rs->raid_disks to avoid memory leaks on free dm raid: support delta_disks for raid1, fix table output dm raid: enhance reshape check and factor out reshape setup dm raid: allow resize during recovery dm raid: fix rs_is_recovering() to allow for lvextend ...
This commit is contained in:
commit
f7e6816994
@ -14,8 +14,12 @@ The target is named "raid" and it accepts the following parameters:
|
||||
<#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
|
||||
|
||||
<raid_type>:
|
||||
raid0 RAID0 striping (no resilience)
|
||||
raid1 RAID1 mirroring
|
||||
raid4 RAID4 dedicated parity disk
|
||||
raid4 RAID4 with dedicated last parity disk
|
||||
raid5_n RAID5 with dedicated last parity disk suporting takeover
|
||||
Same as raid4
|
||||
-Transitory layout
|
||||
raid5_la RAID5 left asymmetric
|
||||
- rotating parity 0 with data continuation
|
||||
raid5_ra RAID5 right asymmetric
|
||||
@ -30,7 +34,19 @@ The target is named "raid" and it accepts the following parameters:
|
||||
- rotating parity N (right-to-left) with data restart
|
||||
raid6_nc RAID6 N continue
|
||||
- rotating parity N (right-to-left) with data continuation
|
||||
raid6_n_6 RAID6 with dedicate parity disks
|
||||
- parity and Q-syndrome on the last 2 disks;
|
||||
laylout for takeover from/to raid4/raid5_n
|
||||
raid6_la_6 Same as "raid_la" plus dedicated last Q-syndrome disk
|
||||
- layout for takeover from raid5_la from/to raid6
|
||||
raid6_ra_6 Same as "raid5_ra" dedicated last Q-syndrome disk
|
||||
- layout for takeover from raid5_ra from/to raid6
|
||||
raid6_ls_6 Same as "raid5_ls" dedicated last Q-syndrome disk
|
||||
- layout for takeover from raid5_ls from/to raid6
|
||||
raid6_rs_6 Same as "raid5_rs" dedicated last Q-syndrome disk
|
||||
- layout for takeover from raid5_rs from/to raid6
|
||||
raid10 Various RAID10 inspired algorithms chosen by additional params
|
||||
(see raid10_format and raid10_copies below)
|
||||
- RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
|
||||
- RAID1E: Integrated Adjacent Stripe Mirroring
|
||||
- RAID1E: Integrated Offset Stripe Mirroring
|
||||
@ -116,10 +132,41 @@ The target is named "raid" and it accepts the following parameters:
|
||||
Here we see layouts closely akin to 'RAID1E - Integrated
|
||||
Offset Stripe Mirroring'.
|
||||
|
||||
[delta_disks <N>]
|
||||
The delta_disks option value (-251 < N < +251) triggers
|
||||
device removal (negative value) or device addition (positive
|
||||
value) to any reshape supporting raid levels 4/5/6 and 10.
|
||||
RAID levels 4/5/6 allow for addition of devices (metadata
|
||||
and data device tupel), raid10_near and raid10_offset only
|
||||
allow for device addtion. raid10_far does not support any
|
||||
reshaping at all.
|
||||
A minimum of devices have to be kept to enforce resilience,
|
||||
which is 3 devices for raid4/5 and 4 devices for raid6.
|
||||
|
||||
[data_offset <sectors>]
|
||||
This option value defines the offset into each data device
|
||||
where the data starts. This is used to provide out-of-place
|
||||
reshaping space to avoid writing over data whilst
|
||||
changing the layout of stripes, hence an interruption/crash
|
||||
may happen at any time without the risk of losing data.
|
||||
E.g. when adding devices to an existing raid set during
|
||||
forward reshaping, the out-of-place space will be allocated
|
||||
at the beginning of each raid device. The kernel raid4/5/6/10
|
||||
MD personalities supporting such device addition will read the data from
|
||||
the existing first stripes (those with smaller number of stripes)
|
||||
starting at data_offset to fill up a new stripe with the larger
|
||||
number of stripes, calculate the redundancy blocks (CRC/Q-syndrome)
|
||||
and write that new stripe to offset 0. Same will be applied to all
|
||||
N-1 other new stripes. This out-of-place scheme is used to change
|
||||
the RAID type (i.e. the allocation algorithm) as well, e.g.
|
||||
changing from raid5_ls to raid5_n.
|
||||
|
||||
<#raid_devs>: The number of devices composing the array.
|
||||
Each device consists of two entries. The first is the device
|
||||
containing the metadata (if any); the second is the one containing the
|
||||
data.
|
||||
data. A Maximum of 64 metadata/data device entries are supported
|
||||
up to target version 1.8.0.
|
||||
1.9.0 supports up to 253 which is enforced by the used MD kernel runtime.
|
||||
|
||||
If a drive has failed or is missing at creation time, a '-' can be
|
||||
given for both the metadata and data drives for a given position.
|
||||
@ -207,7 +254,6 @@ include:
|
||||
"recover"- Initiate/continue a recover process.
|
||||
"check" - Initiate a check (i.e. a "scrub") of the array.
|
||||
"repair" - Initiate a repair of the array.
|
||||
"reshape"- Currently unsupported (-EINVAL).
|
||||
|
||||
|
||||
Discard Support
|
||||
@ -257,3 +303,9 @@ Version History
|
||||
1.5.2 'mismatch_cnt' is zero unless [last_]sync_action is "check".
|
||||
1.6.0 Add discard support (and devices_handle_discard_safely module param).
|
||||
1.7.0 Add support for MD RAID0 mappings.
|
||||
1.8.0 Explictely check for compatible flags in the superblock metadata
|
||||
and reject to start the raid set if any are set by a newer
|
||||
target version, thus avoiding data corruption on a raid set
|
||||
with a reshape in progress.
|
||||
1.9.0 Add support for RAID level takeover/reshape/region size
|
||||
and set size reduction.
|
||||
|
@ -3,7 +3,8 @@
|
||||
#
|
||||
|
||||
dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
|
||||
dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o
|
||||
dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \
|
||||
dm-rq.o
|
||||
dm-multipath-y += dm-path-selector.o dm-mpath.o
|
||||
dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
|
||||
dm-snap-persistent.o
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include "dm.h"
|
||||
#include "dm-core.h"
|
||||
|
||||
/*
|
||||
* The kobject release method must not be placed in the module itself,
|
||||
|
149
drivers/md/dm-core.h
Normal file
149
drivers/md/dm-core.h
Normal file
@ -0,0 +1,149 @@
|
||||
/*
|
||||
* Internal header file _only_ for device mapper core
|
||||
*
|
||||
* Copyright (C) 2016 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This file is released under the LGPL.
|
||||
*/
|
||||
|
||||
#ifndef DM_CORE_INTERNAL_H
|
||||
#define DM_CORE_INTERNAL_H
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
#include "dm.h"
|
||||
|
||||
#define DM_RESERVED_MAX_IOS 1024
|
||||
|
||||
struct dm_kobject_holder {
|
||||
struct kobject kobj;
|
||||
struct completion completion;
|
||||
};
|
||||
|
||||
/*
|
||||
* DM core internal structure that used directly by dm.c and dm-rq.c
|
||||
* DM targets must _not_ deference a mapped_device to directly access its members!
|
||||
*/
|
||||
struct mapped_device {
|
||||
struct srcu_struct io_barrier;
|
||||
struct mutex suspend_lock;
|
||||
|
||||
/*
|
||||
* The current mapping (struct dm_table *).
|
||||
* Use dm_get_live_table{_fast} or take suspend_lock for
|
||||
* dereference.
|
||||
*/
|
||||
void __rcu *map;
|
||||
|
||||
struct list_head table_devices;
|
||||
struct mutex table_devices_lock;
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
struct request_queue *queue;
|
||||
int numa_node_id;
|
||||
|
||||
unsigned type;
|
||||
/* Protect queue and type against concurrent access. */
|
||||
struct mutex type_lock;
|
||||
|
||||
atomic_t holders;
|
||||
atomic_t open_count;
|
||||
|
||||
struct dm_target *immutable_target;
|
||||
struct target_type *immutable_target_type;
|
||||
|
||||
struct gendisk *disk;
|
||||
char name[16];
|
||||
|
||||
void *interface_ptr;
|
||||
|
||||
/*
|
||||
* A list of ios that arrived while we were suspended.
|
||||
*/
|
||||
atomic_t pending[2];
|
||||
wait_queue_head_t wait;
|
||||
struct work_struct work;
|
||||
spinlock_t deferred_lock;
|
||||
struct bio_list deferred;
|
||||
|
||||
/*
|
||||
* Event handling.
|
||||
*/
|
||||
wait_queue_head_t eventq;
|
||||
atomic_t event_nr;
|
||||
atomic_t uevent_seq;
|
||||
struct list_head uevent_list;
|
||||
spinlock_t uevent_lock; /* Protect access to uevent_list */
|
||||
|
||||
/* the number of internal suspends */
|
||||
unsigned internal_suspend_count;
|
||||
|
||||
/*
|
||||
* Processing queue (flush)
|
||||
*/
|
||||
struct workqueue_struct *wq;
|
||||
|
||||
/*
|
||||
* io objects are allocated from here.
|
||||
*/
|
||||
mempool_t *io_pool;
|
||||
mempool_t *rq_pool;
|
||||
|
||||
struct bio_set *bs;
|
||||
|
||||
/*
|
||||
* freeze/thaw support require holding onto a super block
|
||||
*/
|
||||
struct super_block *frozen_sb;
|
||||
|
||||
/* forced geometry settings */
|
||||
struct hd_geometry geometry;
|
||||
|
||||
struct block_device *bdev;
|
||||
|
||||
/* kobject and completion */
|
||||
struct dm_kobject_holder kobj_holder;
|
||||
|
||||
/* zero-length flush that will be cloned and submitted to targets */
|
||||
struct bio flush_bio;
|
||||
|
||||
struct dm_stats stats;
|
||||
|
||||
struct kthread_worker kworker;
|
||||
struct task_struct *kworker_task;
|
||||
|
||||
/* for request-based merge heuristic in dm_request_fn() */
|
||||
unsigned seq_rq_merge_deadline_usecs;
|
||||
int last_rq_rw;
|
||||
sector_t last_rq_pos;
|
||||
ktime_t last_rq_start_time;
|
||||
|
||||
/* for blk-mq request-based DM support */
|
||||
struct blk_mq_tag_set *tag_set;
|
||||
bool use_blk_mq:1;
|
||||
bool init_tio_pdu:1;
|
||||
};
|
||||
|
||||
void dm_init_md_queue(struct mapped_device *md);
|
||||
void dm_init_normal_md_queue(struct mapped_device *md);
|
||||
int md_in_flight(struct mapped_device *md);
|
||||
void disable_write_same(struct mapped_device *md);
|
||||
|
||||
static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
|
||||
{
|
||||
return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
|
||||
}
|
||||
|
||||
unsigned __dm_get_module_param(unsigned *module_param, unsigned def, unsigned max);
|
||||
|
||||
static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
|
||||
{
|
||||
return !maxlen || strlen(result) + 1 >= maxlen;
|
||||
}
|
||||
|
||||
#endif
|
@ -683,7 +683,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc,
|
||||
u8 *data)
|
||||
{
|
||||
struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw;
|
||||
u64 sector = cpu_to_le64((u64)dmreq->iv_sector);
|
||||
__le64 sector = cpu_to_le64(dmreq->iv_sector);
|
||||
u8 buf[TCW_WHITENING_SIZE];
|
||||
SHASH_DESC_ON_STACK(desc, tcw->crc32_tfm);
|
||||
int i, r;
|
||||
@ -722,7 +722,7 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv,
|
||||
struct dm_crypt_request *dmreq)
|
||||
{
|
||||
struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw;
|
||||
u64 sector = cpu_to_le64((u64)dmreq->iv_sector);
|
||||
__le64 sector = cpu_to_le64(dmreq->iv_sector);
|
||||
u8 *src;
|
||||
int r = 0;
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-core.h"
|
||||
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-core.h"
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/vmalloc.h>
|
||||
@ -1267,6 +1267,15 @@ static int populate_table(struct dm_table *table,
|
||||
return dm_table_complete(table);
|
||||
}
|
||||
|
||||
static bool is_valid_type(unsigned cur, unsigned new)
|
||||
{
|
||||
if (cur == new ||
|
||||
(cur == DM_TYPE_BIO_BASED && new == DM_TYPE_DAX_BIO_BASED))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int table_load(struct dm_ioctl *param, size_t param_size)
|
||||
{
|
||||
int r;
|
||||
@ -1309,7 +1318,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
|
||||
DMWARN("unable to set up device queue for new table.");
|
||||
goto err_unlock_md_type;
|
||||
}
|
||||
} else if (dm_get_md_type(md) != dm_table_get_type(t)) {
|
||||
} else if (!is_valid_type(dm_get_md_type(md), dm_table_get_type(t))) {
|
||||
DMWARN("can't change device type after initial table load.");
|
||||
r = -EINVAL;
|
||||
goto err_unlock_md_type;
|
||||
@ -1670,8 +1679,7 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
|
||||
return r;
|
||||
}
|
||||
|
||||
#define DM_PARAMS_KMALLOC 0x0001 /* Params alloced with kmalloc */
|
||||
#define DM_PARAMS_VMALLOC 0x0002 /* Params alloced with vmalloc */
|
||||
#define DM_PARAMS_MALLOC 0x0001 /* Params allocated with kvmalloc() */
|
||||
#define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */
|
||||
|
||||
static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags)
|
||||
@ -1679,10 +1687,8 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla
|
||||
if (param_flags & DM_WIPE_BUFFER)
|
||||
memset(param, 0, param_size);
|
||||
|
||||
if (param_flags & DM_PARAMS_KMALLOC)
|
||||
kfree(param);
|
||||
if (param_flags & DM_PARAMS_VMALLOC)
|
||||
vfree(param);
|
||||
if (param_flags & DM_PARAMS_MALLOC)
|
||||
kvfree(param);
|
||||
}
|
||||
|
||||
static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel,
|
||||
@ -1714,19 +1720,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
|
||||
* Use kmalloc() rather than vmalloc() when we can.
|
||||
*/
|
||||
dmi = NULL;
|
||||
if (param_kernel->data_size <= KMALLOC_MAX_SIZE) {
|
||||
if (param_kernel->data_size <= KMALLOC_MAX_SIZE)
|
||||
dmi = kmalloc(param_kernel->data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
|
||||
if (dmi)
|
||||
*param_flags |= DM_PARAMS_KMALLOC;
|
||||
}
|
||||
|
||||
if (!dmi) {
|
||||
unsigned noio_flag;
|
||||
noio_flag = memalloc_noio_save();
|
||||
dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_HIGH | __GFP_HIGHMEM, PAGE_KERNEL);
|
||||
memalloc_noio_restore(noio_flag);
|
||||
if (dmi)
|
||||
*param_flags |= DM_PARAMS_VMALLOC;
|
||||
}
|
||||
|
||||
if (!dmi) {
|
||||
@ -1735,6 +1736,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
*param_flags |= DM_PARAMS_MALLOC;
|
||||
|
||||
if (copy_from_user(dmi, user, param_kernel->data_size))
|
||||
goto bad;
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include <linux/device-mapper.h>
|
||||
#include <linux/dm-kcopyd.h>
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-core.h"
|
||||
|
||||
#define SUB_JOB_SIZE 128
|
||||
#define SPLIT_COUNT 8
|
||||
|
@ -141,9 +141,27 @@ static int linear_iterate_devices(struct dm_target *ti,
|
||||
return fn(ti, lc->dev, lc->start, ti->len, data);
|
||||
}
|
||||
|
||||
static long linear_direct_access(struct dm_target *ti, sector_t sector,
|
||||
void __pmem **kaddr, pfn_t *pfn, long size)
|
||||
{
|
||||
struct linear_c *lc = ti->private;
|
||||
struct block_device *bdev = lc->dev->bdev;
|
||||
struct blk_dax_ctl dax = {
|
||||
.sector = linear_map_sector(ti, sector),
|
||||
.size = size,
|
||||
};
|
||||
long ret;
|
||||
|
||||
ret = bdev_direct_access(bdev, &dax);
|
||||
*kaddr = dax.addr;
|
||||
*pfn = dax.pfn;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct target_type linear_target = {
|
||||
.name = "linear",
|
||||
.version = {1, 2, 1},
|
||||
.version = {1, 3, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = linear_ctr,
|
||||
.dtr = linear_dtr,
|
||||
@ -151,6 +169,7 @@ static struct target_type linear_target = {
|
||||
.status = linear_status,
|
||||
.prepare_ioctl = linear_prepare_ioctl,
|
||||
.iterate_devices = linear_iterate_devices,
|
||||
.direct_access = linear_direct_access,
|
||||
};
|
||||
|
||||
int __init dm_linear_init(void)
|
||||
|
@ -7,7 +7,8 @@
|
||||
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-rq.h"
|
||||
#include "dm-bio-record.h"
|
||||
#include "dm-path-selector.h"
|
||||
#include "dm-uevent.h"
|
||||
|
||||
@ -89,6 +90,8 @@ struct multipath {
|
||||
atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
|
||||
atomic_t pg_init_count; /* Number of times pg_init called */
|
||||
|
||||
unsigned queue_mode;
|
||||
|
||||
/*
|
||||
* We must use a mempool of dm_mpath_io structs so that we
|
||||
* can resubmit bios on error.
|
||||
@ -97,10 +100,13 @@ struct multipath {
|
||||
|
||||
struct mutex work_mutex;
|
||||
struct work_struct trigger_event;
|
||||
|
||||
struct work_struct process_queued_bios;
|
||||
struct bio_list queued_bios;
|
||||
};
|
||||
|
||||
/*
|
||||
* Context information attached to each bio we process.
|
||||
* Context information attached to each io we process.
|
||||
*/
|
||||
struct dm_mpath_io {
|
||||
struct pgpath *pgpath;
|
||||
@ -114,6 +120,7 @@ static struct kmem_cache *_mpio_cache;
|
||||
static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
|
||||
static void trigger_event(struct work_struct *work);
|
||||
static void activate_path(struct work_struct *work);
|
||||
static void process_queued_bios(struct work_struct *work);
|
||||
|
||||
/*-----------------------------------------------
|
||||
* Multipath state flags.
|
||||
@ -185,7 +192,7 @@ static void free_priority_group(struct priority_group *pg,
|
||||
kfree(pg);
|
||||
}
|
||||
|
||||
static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
|
||||
static struct multipath *alloc_multipath(struct dm_target *ti)
|
||||
{
|
||||
struct multipath *m;
|
||||
|
||||
@ -203,15 +210,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
|
||||
mutex_init(&m->work_mutex);
|
||||
|
||||
m->mpio_pool = NULL;
|
||||
if (!use_blk_mq) {
|
||||
unsigned min_ios = dm_get_reserved_rq_based_ios();
|
||||
|
||||
m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
|
||||
if (!m->mpio_pool) {
|
||||
kfree(m);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
m->queue_mode = DM_TYPE_NONE;
|
||||
|
||||
m->ti = ti;
|
||||
ti->private = m;
|
||||
@ -220,6 +219,39 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
|
||||
return m;
|
||||
}
|
||||
|
||||
static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
|
||||
{
|
||||
if (m->queue_mode == DM_TYPE_NONE) {
|
||||
/*
|
||||
* Default to request-based.
|
||||
*/
|
||||
if (dm_use_blk_mq(dm_table_get_md(ti->table)))
|
||||
m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
|
||||
else
|
||||
m->queue_mode = DM_TYPE_REQUEST_BASED;
|
||||
}
|
||||
|
||||
if (m->queue_mode == DM_TYPE_REQUEST_BASED) {
|
||||
unsigned min_ios = dm_get_reserved_rq_based_ios();
|
||||
|
||||
m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
|
||||
if (!m->mpio_pool)
|
||||
return -ENOMEM;
|
||||
}
|
||||
else if (m->queue_mode == DM_TYPE_BIO_BASED) {
|
||||
INIT_WORK(&m->process_queued_bios, process_queued_bios);
|
||||
/*
|
||||
* bio-based doesn't support any direct scsi_dh management;
|
||||
* it just discovers if a scsi_dh is attached.
|
||||
*/
|
||||
set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
|
||||
}
|
||||
|
||||
dm_table_set_type(ti->table, m->queue_mode);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_multipath(struct multipath *m)
|
||||
{
|
||||
struct priority_group *pg, *tmp;
|
||||
@ -272,6 +304,41 @@ static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
|
||||
}
|
||||
}
|
||||
|
||||
static size_t multipath_per_bio_data_size(void)
|
||||
{
|
||||
return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
|
||||
}
|
||||
|
||||
static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio)
|
||||
{
|
||||
return dm_per_bio_data(bio, multipath_per_bio_data_size());
|
||||
}
|
||||
|
||||
static struct dm_bio_details *get_bio_details_from_bio(struct bio *bio)
|
||||
{
|
||||
/* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */
|
||||
struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
|
||||
void *bio_details = mpio + 1;
|
||||
|
||||
return bio_details;
|
||||
}
|
||||
|
||||
static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p,
|
||||
struct dm_bio_details **bio_details_p)
|
||||
{
|
||||
struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
|
||||
struct dm_bio_details *bio_details = get_bio_details_from_bio(bio);
|
||||
|
||||
memset(mpio, 0, sizeof(*mpio));
|
||||
memset(bio_details, 0, sizeof(*bio_details));
|
||||
dm_bio_record(bio_details, bio);
|
||||
|
||||
if (mpio_p)
|
||||
*mpio_p = mpio;
|
||||
if (bio_details_p)
|
||||
*bio_details_p = bio_details;
|
||||
}
|
||||
|
||||
/*-----------------------------------------------
|
||||
* Path selection
|
||||
*-----------------------------------------------*/
|
||||
@ -431,16 +498,26 @@ failed:
|
||||
* and multipath_resume() calls and we have no need to check
|
||||
* for the DMF_NOFLUSH_SUSPENDING flag.
|
||||
*/
|
||||
static int must_push_back(struct multipath *m)
|
||||
static bool __must_push_back(struct multipath *m)
|
||||
{
|
||||
return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
|
||||
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
|
||||
dm_noflush_suspending(m->ti));
|
||||
}
|
||||
|
||||
static bool must_push_back_rq(struct multipath *m)
|
||||
{
|
||||
return (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) ||
|
||||
((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
|
||||
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
|
||||
dm_noflush_suspending(m->ti)));
|
||||
__must_push_back(m));
|
||||
}
|
||||
|
||||
static bool must_push_back_bio(struct multipath *m)
|
||||
{
|
||||
return __must_push_back(m);
|
||||
}
|
||||
|
||||
/*
|
||||
* Map cloned requests
|
||||
* Map cloned requests (request-based multipath)
|
||||
*/
|
||||
static int __multipath_map(struct dm_target *ti, struct request *clone,
|
||||
union map_info *map_context,
|
||||
@ -459,7 +536,7 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
|
||||
pgpath = choose_pgpath(m, nr_bytes);
|
||||
|
||||
if (!pgpath) {
|
||||
if (!must_push_back(m))
|
||||
if (!must_push_back_rq(m))
|
||||
r = -EIO; /* Failed */
|
||||
return r;
|
||||
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
|
||||
@ -529,6 +606,108 @@ static void multipath_release_clone(struct request *clone)
|
||||
blk_mq_free_request(clone);
|
||||
}
|
||||
|
||||
/*
|
||||
* Map cloned bios (bio-based multipath)
|
||||
*/
|
||||
static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_mpath_io *mpio)
|
||||
{
|
||||
size_t nr_bytes = bio->bi_iter.bi_size;
|
||||
struct pgpath *pgpath;
|
||||
unsigned long flags;
|
||||
bool queue_io;
|
||||
|
||||
/* Do we need to select a new pgpath? */
|
||||
pgpath = lockless_dereference(m->current_pgpath);
|
||||
queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
|
||||
if (!pgpath || !queue_io)
|
||||
pgpath = choose_pgpath(m, nr_bytes);
|
||||
|
||||
if ((pgpath && queue_io) ||
|
||||
(!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
|
||||
/* Queue for the daemon to resubmit */
|
||||
spin_lock_irqsave(&m->lock, flags);
|
||||
bio_list_add(&m->queued_bios, bio);
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
/* PG_INIT_REQUIRED cannot be set without QUEUE_IO */
|
||||
if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
|
||||
pg_init_all_paths(m);
|
||||
else if (!queue_io)
|
||||
queue_work(kmultipathd, &m->process_queued_bios);
|
||||
return DM_MAPIO_SUBMITTED;
|
||||
}
|
||||
|
||||
if (!pgpath) {
|
||||
if (!must_push_back_bio(m))
|
||||
return -EIO;
|
||||
return DM_MAPIO_REQUEUE;
|
||||
}
|
||||
|
||||
mpio->pgpath = pgpath;
|
||||
mpio->nr_bytes = nr_bytes;
|
||||
|
||||
bio->bi_error = 0;
|
||||
bio->bi_bdev = pgpath->path.dev->bdev;
|
||||
bio->bi_rw |= REQ_FAILFAST_TRANSPORT;
|
||||
|
||||
if (pgpath->pg->ps.type->start_io)
|
||||
pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
|
||||
&pgpath->path,
|
||||
nr_bytes);
|
||||
return DM_MAPIO_REMAPPED;
|
||||
}
|
||||
|
||||
static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
|
||||
{
|
||||
struct multipath *m = ti->private;
|
||||
struct dm_mpath_io *mpio = NULL;
|
||||
|
||||
multipath_init_per_bio_data(bio, &mpio, NULL);
|
||||
|
||||
return __multipath_map_bio(m, bio, mpio);
|
||||
}
|
||||
|
||||
static void process_queued_bios_list(struct multipath *m)
|
||||
{
|
||||
if (m->queue_mode == DM_TYPE_BIO_BASED)
|
||||
queue_work(kmultipathd, &m->process_queued_bios);
|
||||
}
|
||||
|
||||
static void process_queued_bios(struct work_struct *work)
|
||||
{
|
||||
int r;
|
||||
unsigned long flags;
|
||||
struct bio *bio;
|
||||
struct bio_list bios;
|
||||
struct blk_plug plug;
|
||||
struct multipath *m =
|
||||
container_of(work, struct multipath, process_queued_bios);
|
||||
|
||||
bio_list_init(&bios);
|
||||
|
||||
spin_lock_irqsave(&m->lock, flags);
|
||||
|
||||
if (bio_list_empty(&m->queued_bios)) {
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
bio_list_merge(&bios, &m->queued_bios);
|
||||
bio_list_init(&m->queued_bios);
|
||||
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
|
||||
blk_start_plug(&plug);
|
||||
while ((bio = bio_list_pop(&bios))) {
|
||||
r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
|
||||
if (r < 0 || r == DM_MAPIO_REQUEUE) {
|
||||
bio->bi_error = r;
|
||||
bio_endio(bio);
|
||||
} else if (r == DM_MAPIO_REMAPPED)
|
||||
generic_make_request(bio);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we run out of usable paths, should we queue I/O or error it?
|
||||
*/
|
||||
@ -557,8 +736,10 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
|
||||
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
|
||||
if (!queue_if_no_path)
|
||||
if (!queue_if_no_path) {
|
||||
dm_table_run_md_queue_async(m->ti->table);
|
||||
process_queued_bios_list(m);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -798,6 +979,12 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
|
||||
if (!hw_argc)
|
||||
return 0;
|
||||
|
||||
if (m->queue_mode == DM_TYPE_BIO_BASED) {
|
||||
dm_consume_args(as, hw_argc);
|
||||
DMERR("bio-based multipath doesn't allow hardware handler args");
|
||||
return 0;
|
||||
}
|
||||
|
||||
m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
|
||||
|
||||
if (hw_argc > 1) {
|
||||
@ -833,7 +1020,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
|
||||
const char *arg_name;
|
||||
|
||||
static struct dm_arg _args[] = {
|
||||
{0, 6, "invalid number of feature args"},
|
||||
{0, 8, "invalid number of feature args"},
|
||||
{1, 50, "pg_init_retries must be between 1 and 50"},
|
||||
{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
|
||||
};
|
||||
@ -873,6 +1060,24 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcasecmp(arg_name, "queue_mode") &&
|
||||
(argc >= 1)) {
|
||||
const char *queue_mode_name = dm_shift_arg(as);
|
||||
|
||||
if (!strcasecmp(queue_mode_name, "bio"))
|
||||
m->queue_mode = DM_TYPE_BIO_BASED;
|
||||
else if (!strcasecmp(queue_mode_name, "rq"))
|
||||
m->queue_mode = DM_TYPE_REQUEST_BASED;
|
||||
else if (!strcasecmp(queue_mode_name, "mq"))
|
||||
m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
|
||||
else {
|
||||
ti->error = "Unknown 'queue_mode' requested";
|
||||
r = -EINVAL;
|
||||
}
|
||||
argc--;
|
||||
continue;
|
||||
}
|
||||
|
||||
ti->error = "Unrecognised multipath feature request";
|
||||
r = -EINVAL;
|
||||
} while (argc && !r);
|
||||
@ -880,8 +1085,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
|
||||
return r;
|
||||
}
|
||||
|
||||
static int multipath_ctr(struct dm_target *ti, unsigned int argc,
|
||||
char **argv)
|
||||
static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||
{
|
||||
/* target arguments */
|
||||
static struct dm_arg _args[] = {
|
||||
@ -894,12 +1098,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
|
||||
struct dm_arg_set as;
|
||||
unsigned pg_count = 0;
|
||||
unsigned next_pg_num;
|
||||
bool use_blk_mq = dm_use_blk_mq(dm_table_get_md(ti->table));
|
||||
|
||||
as.argc = argc;
|
||||
as.argv = argv;
|
||||
|
||||
m = alloc_multipath(ti, use_blk_mq);
|
||||
m = alloc_multipath(ti);
|
||||
if (!m) {
|
||||
ti->error = "can't allocate multipath";
|
||||
return -EINVAL;
|
||||
@ -909,6 +1112,10 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
|
||||
if (r)
|
||||
goto bad;
|
||||
|
||||
r = alloc_multipath_stage2(ti, m);
|
||||
if (r)
|
||||
goto bad;
|
||||
|
||||
r = parse_hw_handler(&as, m);
|
||||
if (r)
|
||||
goto bad;
|
||||
@ -958,7 +1165,9 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
|
||||
ti->num_flush_bios = 1;
|
||||
ti->num_discard_bios = 1;
|
||||
ti->num_write_same_bios = 1;
|
||||
if (use_blk_mq)
|
||||
if (m->queue_mode == DM_TYPE_BIO_BASED)
|
||||
ti->per_io_data_size = multipath_per_bio_data_size();
|
||||
else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
|
||||
ti->per_io_data_size = sizeof(struct dm_mpath_io);
|
||||
|
||||
return 0;
|
||||
@ -1083,8 +1292,10 @@ static int reinstate_path(struct pgpath *pgpath)
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
if (run_queue)
|
||||
if (run_queue) {
|
||||
dm_table_run_md_queue_async(m->ti->table);
|
||||
process_queued_bios_list(m);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
@ -1281,6 +1492,8 @@ static void pg_init_done(void *data, int errors)
|
||||
}
|
||||
clear_bit(MPATHF_QUEUE_IO, &m->flags);
|
||||
|
||||
process_queued_bios_list(m);
|
||||
|
||||
/*
|
||||
* Wake up any thread waiting to suspend.
|
||||
*/
|
||||
@ -1328,7 +1541,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
|
||||
* during end I/O handling, since those clone requests don't have
|
||||
* bio clones. If we queue them inside the multipath target,
|
||||
* we need to make bio clones, that requires memory allocation.
|
||||
* (See drivers/md/dm.c:end_clone_bio() about why the clone requests
|
||||
* (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests
|
||||
* don't have bio clones.)
|
||||
* Instead of queueing the clone request here, we queue the original
|
||||
* request into dm core, which will remake a clone request and
|
||||
@ -1347,7 +1560,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
|
||||
|
||||
if (!atomic_read(&m->nr_valid_paths)) {
|
||||
if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
|
||||
if (!must_push_back(m))
|
||||
if (!must_push_back_rq(m))
|
||||
r = -EIO;
|
||||
} else {
|
||||
if (error == -EBADE)
|
||||
@ -1381,6 +1594,64 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
|
||||
return r;
|
||||
}
|
||||
|
||||
static int do_end_io_bio(struct multipath *m, struct bio *clone,
|
||||
int error, struct dm_mpath_io *mpio)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!error)
|
||||
return 0; /* I/O complete */
|
||||
|
||||
if (noretry_error(error))
|
||||
return error;
|
||||
|
||||
if (mpio->pgpath)
|
||||
fail_path(mpio->pgpath);
|
||||
|
||||
if (!atomic_read(&m->nr_valid_paths)) {
|
||||
if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
|
||||
if (!must_push_back_bio(m))
|
||||
return -EIO;
|
||||
return DM_ENDIO_REQUEUE;
|
||||
} else {
|
||||
if (error == -EBADE)
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
/* Queue for the daemon to resubmit */
|
||||
dm_bio_restore(get_bio_details_from_bio(clone), clone);
|
||||
|
||||
spin_lock_irqsave(&m->lock, flags);
|
||||
bio_list_add(&m->queued_bios, clone);
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
|
||||
queue_work(kmultipathd, &m->process_queued_bios);
|
||||
|
||||
return DM_ENDIO_INCOMPLETE;
|
||||
}
|
||||
|
||||
static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
|
||||
{
|
||||
struct multipath *m = ti->private;
|
||||
struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
|
||||
struct pgpath *pgpath;
|
||||
struct path_selector *ps;
|
||||
int r;
|
||||
|
||||
BUG_ON(!mpio);
|
||||
|
||||
r = do_end_io_bio(m, clone, error, mpio);
|
||||
pgpath = mpio->pgpath;
|
||||
if (pgpath) {
|
||||
ps = &pgpath->pg->ps;
|
||||
if (ps->type->end_io)
|
||||
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Suspend can't complete until all the I/O is processed so if
|
||||
* the last path fails we must error any remaining I/O.
|
||||
@ -1454,7 +1725,9 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
|
||||
DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) +
|
||||
(m->pg_init_retries > 0) * 2 +
|
||||
(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
|
||||
test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags));
|
||||
test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) +
|
||||
(m->queue_mode != DM_TYPE_REQUEST_BASED) * 2);
|
||||
|
||||
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
||||
DMEMIT("queue_if_no_path ");
|
||||
if (m->pg_init_retries)
|
||||
@ -1463,6 +1736,16 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
|
||||
DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
|
||||
if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
|
||||
DMEMIT("retain_attached_hw_handler ");
|
||||
if (m->queue_mode != DM_TYPE_REQUEST_BASED) {
|
||||
switch(m->queue_mode) {
|
||||
case DM_TYPE_BIO_BASED:
|
||||
DMEMIT("queue_mode bio ");
|
||||
break;
|
||||
case DM_TYPE_MQ_REQUEST_BASED:
|
||||
DMEMIT("queue_mode mq ");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!m->hw_handler_name || type == STATUSTYPE_INFO)
|
||||
@ -1642,6 +1925,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
|
||||
if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
|
||||
pg_init_all_paths(m);
|
||||
dm_table_run_md_queue_async(m->ti->table);
|
||||
process_queued_bios_list(m);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1748,7 +2032,7 @@ static int multipath_busy(struct dm_target *ti)
|
||||
*---------------------------------------------------------------*/
|
||||
static struct target_type multipath_target = {
|
||||
.name = "multipath",
|
||||
.version = {1, 11, 0},
|
||||
.version = {1, 12, 0},
|
||||
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
|
||||
.module = THIS_MODULE,
|
||||
.ctr = multipath_ctr,
|
||||
@ -1757,6 +2041,8 @@ static struct target_type multipath_target = {
|
||||
.clone_and_map_rq = multipath_clone_and_map,
|
||||
.release_clone_rq = multipath_release_clone,
|
||||
.rq_end_io = multipath_end_io,
|
||||
.map = multipath_map_bio,
|
||||
.end_io = multipath_end_io_bio,
|
||||
.presuspend = multipath_presuspend,
|
||||
.postsuspend = multipath_postsuspend,
|
||||
.resume = multipath_resume,
|
||||
@ -1771,14 +2057,14 @@ static int __init dm_multipath_init(void)
|
||||
{
|
||||
int r;
|
||||
|
||||
/* allocate a slab for the dm_ios */
|
||||
/* allocate a slab for the dm_mpath_ios */
|
||||
_mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
|
||||
if (!_mpio_cache)
|
||||
return -ENOMEM;
|
||||
|
||||
r = dm_register_target(&multipath_target);
|
||||
if (r < 0) {
|
||||
DMERR("register failed %d", r);
|
||||
DMERR("request-based register failed %d", r);
|
||||
r = -EINVAL;
|
||||
goto bad_register_target;
|
||||
}
|
||||
@ -1804,10 +2090,6 @@ static int __init dm_multipath_init(void)
|
||||
goto bad_alloc_kmpath_handlerd;
|
||||
}
|
||||
|
||||
DMINFO("version %u.%u.%u loaded",
|
||||
multipath_target.version[0], multipath_target.version[1],
|
||||
multipath_target.version[2]);
|
||||
|
||||
return 0;
|
||||
|
||||
bad_alloc_kmpath_handlerd:
|
||||
|
3101
drivers/md/dm-raid.c
3101
drivers/md/dm-raid.c
File diff suppressed because it is too large
Load Diff
970
drivers/md/dm-rq.c
Normal file
970
drivers/md/dm-rq.c
Normal file
@ -0,0 +1,970 @@
|
||||
/*
|
||||
* Copyright (C) 2016 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm-core.h"
|
||||
#include "dm-rq.h"
|
||||
|
||||
#include <linux/elevator.h> /* for rq_end_sector() */
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#define DM_MSG_PREFIX "core-rq"
|
||||
|
||||
#define DM_MQ_NR_HW_QUEUES 1
|
||||
#define DM_MQ_QUEUE_DEPTH 2048
|
||||
static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
|
||||
static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
|
||||
|
||||
/*
|
||||
* Request-based DM's mempools' reserved IOs set by the user.
|
||||
*/
|
||||
#define RESERVED_REQUEST_BASED_IOS 256
|
||||
static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
|
||||
|
||||
#ifdef CONFIG_DM_MQ_DEFAULT
|
||||
static bool use_blk_mq = true;
|
||||
#else
|
||||
static bool use_blk_mq = false;
|
||||
#endif
|
||||
|
||||
bool dm_use_blk_mq_default(void)
|
||||
{
|
||||
return use_blk_mq;
|
||||
}
|
||||
|
||||
bool dm_use_blk_mq(struct mapped_device *md)
|
||||
{
|
||||
return md->use_blk_mq;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_use_blk_mq);
|
||||
|
||||
unsigned dm_get_reserved_rq_based_ios(void)
|
||||
{
|
||||
return __dm_get_module_param(&reserved_rq_based_ios,
|
||||
RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
|
||||
|
||||
static unsigned dm_get_blk_mq_nr_hw_queues(void)
|
||||
{
|
||||
return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
|
||||
}
|
||||
|
||||
static unsigned dm_get_blk_mq_queue_depth(void)
|
||||
{
|
||||
return __dm_get_module_param(&dm_mq_queue_depth,
|
||||
DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
|
||||
}
|
||||
|
||||
int dm_request_based(struct mapped_device *md)
|
||||
{
|
||||
return blk_queue_stackable(md->queue);
|
||||
}
|
||||
|
||||
static void dm_old_start_queue(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
if (blk_queue_stopped(q))
|
||||
blk_start_queue(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
void dm_start_queue(struct request_queue *q)
|
||||
{
|
||||
if (!q->mq_ops)
|
||||
dm_old_start_queue(q);
|
||||
else {
|
||||
blk_mq_start_stopped_hw_queues(q, true);
|
||||
blk_mq_kick_requeue_list(q);
|
||||
}
|
||||
}
|
||||
|
||||
static void dm_old_stop_queue(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
if (blk_queue_stopped(q)) {
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
blk_stop_queue(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
void dm_stop_queue(struct request_queue *q)
|
||||
{
|
||||
if (!q->mq_ops)
|
||||
dm_old_stop_queue(q);
|
||||
else
|
||||
blk_mq_stop_hw_queues(q);
|
||||
}
|
||||
|
||||
static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
return mempool_alloc(md->io_pool, gfp_mask);
|
||||
}
|
||||
|
||||
static void free_old_rq_tio(struct dm_rq_target_io *tio)
|
||||
{
|
||||
mempool_free(tio, tio->md->io_pool);
|
||||
}
|
||||
|
||||
static struct request *alloc_old_clone_request(struct mapped_device *md,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
return mempool_alloc(md->rq_pool, gfp_mask);
|
||||
}
|
||||
|
||||
static void free_old_clone_request(struct mapped_device *md, struct request *rq)
|
||||
{
|
||||
mempool_free(rq, md->rq_pool);
|
||||
}
|
||||
|
||||
/*
|
||||
* Partial completion handling for request-based dm
|
||||
*/
|
||||
static void end_clone_bio(struct bio *clone)
|
||||
{
|
||||
struct dm_rq_clone_bio_info *info =
|
||||
container_of(clone, struct dm_rq_clone_bio_info, clone);
|
||||
struct dm_rq_target_io *tio = info->tio;
|
||||
struct bio *bio = info->orig;
|
||||
unsigned int nr_bytes = info->orig->bi_iter.bi_size;
|
||||
int error = clone->bi_error;
|
||||
|
||||
bio_put(clone);
|
||||
|
||||
if (tio->error)
|
||||
/*
|
||||
* An error has already been detected on the request.
|
||||
* Once error occurred, just let clone->end_io() handle
|
||||
* the remainder.
|
||||
*/
|
||||
return;
|
||||
else if (error) {
|
||||
/*
|
||||
* Don't notice the error to the upper layer yet.
|
||||
* The error handling decision is made by the target driver,
|
||||
* when the request is completed.
|
||||
*/
|
||||
tio->error = error;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* I/O for the bio successfully completed.
|
||||
* Notice the data completion to the upper layer.
|
||||
*/
|
||||
|
||||
/*
|
||||
* bios are processed from the head of the list.
|
||||
* So the completing bio should always be rq->bio.
|
||||
* If it's not, something wrong is happening.
|
||||
*/
|
||||
if (tio->orig->bio != bio)
|
||||
DMERR("bio completion is going in the middle of the request");
|
||||
|
||||
/*
|
||||
* Update the original request.
|
||||
* Do not use blk_end_request() here, because it may complete
|
||||
* the original request before the clone, and break the ordering.
|
||||
*/
|
||||
blk_update_request(tio->orig, 0, nr_bytes);
|
||||
}
|
||||
|
||||
static struct dm_rq_target_io *tio_from_request(struct request *rq)
|
||||
{
|
||||
return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
|
||||
}
|
||||
|
||||
static void rq_end_stats(struct mapped_device *md, struct request *orig)
|
||||
{
|
||||
if (unlikely(dm_stats_used(&md->stats))) {
|
||||
struct dm_rq_target_io *tio = tio_from_request(orig);
|
||||
tio->duration_jiffies = jiffies - tio->duration_jiffies;
|
||||
dm_stats_account_io(&md->stats, rq_data_dir(orig),
|
||||
blk_rq_pos(orig), tio->n_sectors, true,
|
||||
tio->duration_jiffies, &tio->stats_aux);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't touch any member of the md after calling this function because
|
||||
* the md may be freed in dm_put() at the end of this function.
|
||||
* Or do dm_get() before calling this function and dm_put() later.
|
||||
*/
|
||||
static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
|
||||
{
|
||||
atomic_dec(&md->pending[rw]);
|
||||
|
||||
/* nudge anyone waiting on suspend queue */
|
||||
if (!md_in_flight(md))
|
||||
wake_up(&md->wait);
|
||||
|
||||
/*
|
||||
* Run this off this callpath, as drivers could invoke end_io while
|
||||
* inside their request_fn (and holding the queue lock). Calling
|
||||
* back into ->request_fn() could deadlock attempting to grab the
|
||||
* queue lock again.
|
||||
*/
|
||||
if (!md->queue->mq_ops && run_queue)
|
||||
blk_run_queue_async(md->queue);
|
||||
|
||||
/*
|
||||
* dm_put() must be at the end of this function. See the comment above
|
||||
*/
|
||||
dm_put(md);
|
||||
}
|
||||
|
||||
static void free_rq_clone(struct request *clone)
|
||||
{
|
||||
struct dm_rq_target_io *tio = clone->end_io_data;
|
||||
struct mapped_device *md = tio->md;
|
||||
|
||||
blk_rq_unprep_clone(clone);
|
||||
|
||||
/*
|
||||
* It is possible for a clone_old_rq() allocated clone to
|
||||
* get passed in -- it may not yet have a request_queue.
|
||||
* This is known to occur if the error target replaces
|
||||
* a multipath target that has a request_fn queue stacked
|
||||
* on blk-mq queue(s).
|
||||
*/
|
||||
if (clone->q && clone->q->mq_ops)
|
||||
/* stacked on blk-mq queue(s) */
|
||||
tio->ti->type->release_clone_rq(clone);
|
||||
else if (!md->queue->mq_ops)
|
||||
/* request_fn queue stacked on request_fn queue(s) */
|
||||
free_old_clone_request(md, clone);
|
||||
|
||||
if (!md->queue->mq_ops)
|
||||
free_old_rq_tio(tio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Complete the clone and the original request.
|
||||
* Must be called without clone's queue lock held,
|
||||
* see end_clone_request() for more details.
|
||||
*/
|
||||
static void dm_end_request(struct request *clone, int error)
|
||||
{
|
||||
int rw = rq_data_dir(clone);
|
||||
struct dm_rq_target_io *tio = clone->end_io_data;
|
||||
struct mapped_device *md = tio->md;
|
||||
struct request *rq = tio->orig;
|
||||
|
||||
if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
|
||||
rq->errors = clone->errors;
|
||||
rq->resid_len = clone->resid_len;
|
||||
|
||||
if (rq->sense)
|
||||
/*
|
||||
* We are using the sense buffer of the original
|
||||
* request.
|
||||
* So setting the length of the sense data is enough.
|
||||
*/
|
||||
rq->sense_len = clone->sense_len;
|
||||
}
|
||||
|
||||
free_rq_clone(clone);
|
||||
rq_end_stats(md, rq);
|
||||
if (!rq->q->mq_ops)
|
||||
blk_end_request_all(rq, error);
|
||||
else
|
||||
blk_mq_end_request(rq, error);
|
||||
rq_completed(md, rw, true);
|
||||
}
|
||||
|
||||
static void dm_unprep_request(struct request *rq)
|
||||
{
|
||||
struct dm_rq_target_io *tio = tio_from_request(rq);
|
||||
struct request *clone = tio->clone;
|
||||
|
||||
if (!rq->q->mq_ops) {
|
||||
rq->special = NULL;
|
||||
rq->cmd_flags &= ~REQ_DONTPREP;
|
||||
}
|
||||
|
||||
if (clone)
|
||||
free_rq_clone(clone);
|
||||
else if (!tio->md->queue->mq_ops)
|
||||
free_old_rq_tio(tio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Requeue the original request of a clone.
|
||||
*/
|
||||
static void dm_old_requeue_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
blk_requeue_request(q, rq);
|
||||
blk_run_queue_async(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
static void dm_mq_requeue_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
unsigned long flags;
|
||||
|
||||
blk_mq_requeue_request(rq);
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
if (!blk_queue_stopped(q))
|
||||
blk_mq_kick_requeue_list(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
static void dm_requeue_original_request(struct mapped_device *md,
|
||||
struct request *rq)
|
||||
{
|
||||
int rw = rq_data_dir(rq);
|
||||
|
||||
rq_end_stats(md, rq);
|
||||
dm_unprep_request(rq);
|
||||
|
||||
if (!rq->q->mq_ops)
|
||||
dm_old_requeue_request(rq);
|
||||
else
|
||||
dm_mq_requeue_request(rq);
|
||||
|
||||
rq_completed(md, rw, false);
|
||||
}
|
||||
|
||||
static void dm_done(struct request *clone, int error, bool mapped)
|
||||
{
|
||||
int r = error;
|
||||
struct dm_rq_target_io *tio = clone->end_io_data;
|
||||
dm_request_endio_fn rq_end_io = NULL;
|
||||
|
||||
if (tio->ti) {
|
||||
rq_end_io = tio->ti->type->rq_end_io;
|
||||
|
||||
if (mapped && rq_end_io)
|
||||
r = rq_end_io(tio->ti, clone, error, &tio->info);
|
||||
}
|
||||
|
||||
if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) &&
|
||||
!clone->q->limits.max_write_same_sectors))
|
||||
disable_write_same(tio->md);
|
||||
|
||||
if (r <= 0)
|
||||
/* The target wants to complete the I/O */
|
||||
dm_end_request(clone, r);
|
||||
else if (r == DM_ENDIO_INCOMPLETE)
|
||||
/* The target will handle the I/O */
|
||||
return;
|
||||
else if (r == DM_ENDIO_REQUEUE)
|
||||
/* The target wants to requeue the I/O */
|
||||
dm_requeue_original_request(tio->md, tio->orig);
|
||||
else {
|
||||
DMWARN("unimplemented target endio return value: %d", r);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Request completion handler for request-based dm
|
||||
*/
|
||||
static void dm_softirq_done(struct request *rq)
|
||||
{
|
||||
bool mapped = true;
|
||||
struct dm_rq_target_io *tio = tio_from_request(rq);
|
||||
struct request *clone = tio->clone;
|
||||
int rw;
|
||||
|
||||
if (!clone) {
|
||||
rq_end_stats(tio->md, rq);
|
||||
rw = rq_data_dir(rq);
|
||||
if (!rq->q->mq_ops) {
|
||||
blk_end_request_all(rq, tio->error);
|
||||
rq_completed(tio->md, rw, false);
|
||||
free_old_rq_tio(tio);
|
||||
} else {
|
||||
blk_mq_end_request(rq, tio->error);
|
||||
rq_completed(tio->md, rw, false);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (rq->cmd_flags & REQ_FAILED)
|
||||
mapped = false;
|
||||
|
||||
dm_done(clone, tio->error, mapped);
|
||||
}
|
||||
|
||||
/*
|
||||
* Complete the clone and the original request with the error status
|
||||
* through softirq context.
|
||||
*/
|
||||
static void dm_complete_request(struct request *rq, int error)
|
||||
{
|
||||
struct dm_rq_target_io *tio = tio_from_request(rq);
|
||||
|
||||
tio->error = error;
|
||||
if (!rq->q->mq_ops)
|
||||
blk_complete_request(rq);
|
||||
else
|
||||
blk_mq_complete_request(rq, error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Complete the not-mapped clone and the original request with the error status
|
||||
* through softirq context.
|
||||
* Target's rq_end_io() function isn't called.
|
||||
* This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
|
||||
*/
|
||||
static void dm_kill_unmapped_request(struct request *rq, int error)
|
||||
{
|
||||
rq->cmd_flags |= REQ_FAILED;
|
||||
dm_complete_request(rq, error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with the clone's queue lock held (in the case of .request_fn)
|
||||
*/
|
||||
static void end_clone_request(struct request *clone, int error)
|
||||
{
|
||||
struct dm_rq_target_io *tio = clone->end_io_data;
|
||||
|
||||
if (!clone->q->mq_ops) {
|
||||
/*
|
||||
* For just cleaning up the information of the queue in which
|
||||
* the clone was dispatched.
|
||||
* The clone is *NOT* freed actually here because it is alloced
|
||||
* from dm own mempool (REQ_ALLOCED isn't set).
|
||||
*/
|
||||
__blk_put_request(clone->q, clone);
|
||||
}
|
||||
|
||||
/*
|
||||
* Actual request completion is done in a softirq context which doesn't
|
||||
* hold the clone's queue lock. Otherwise, deadlock could occur because:
|
||||
* - another request may be submitted by the upper level driver
|
||||
* of the stacking during the completion
|
||||
* - the submission which requires queue lock may be done
|
||||
* against this clone's queue
|
||||
*/
|
||||
dm_complete_request(tio->orig, error);
|
||||
}
|
||||
|
||||
static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (blk_queue_io_stat(clone->q))
|
||||
clone->cmd_flags |= REQ_IO_STAT;
|
||||
|
||||
clone->start_time = jiffies;
|
||||
r = blk_insert_cloned_request(clone->q, clone);
|
||||
if (r)
|
||||
/* must complete clone in terms of original request */
|
||||
dm_complete_request(rq, r);
|
||||
}
|
||||
|
||||
static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
|
||||
void *data)
|
||||
{
|
||||
struct dm_rq_target_io *tio = data;
|
||||
struct dm_rq_clone_bio_info *info =
|
||||
container_of(bio, struct dm_rq_clone_bio_info, clone);
|
||||
|
||||
info->orig = bio_orig;
|
||||
info->tio = tio;
|
||||
bio->bi_end_io = end_clone_bio;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_clone(struct request *clone, struct request *rq,
|
||||
struct dm_rq_target_io *tio, gfp_t gfp_mask)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
|
||||
dm_rq_bio_constructor, tio);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
clone->cmd = rq->cmd;
|
||||
clone->cmd_len = rq->cmd_len;
|
||||
clone->sense = rq->sense;
|
||||
clone->end_io = end_clone_request;
|
||||
clone->end_io_data = tio;
|
||||
|
||||
tio->clone = clone;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
|
||||
struct dm_rq_target_io *tio, gfp_t gfp_mask)
|
||||
{
|
||||
/*
|
||||
* Create clone for use with .request_fn request_queue
|
||||
*/
|
||||
struct request *clone;
|
||||
|
||||
clone = alloc_old_clone_request(md, gfp_mask);
|
||||
if (!clone)
|
||||
return NULL;
|
||||
|
||||
blk_rq_init(NULL, clone);
|
||||
if (setup_clone(clone, rq, tio, gfp_mask)) {
|
||||
/* -ENOMEM */
|
||||
free_old_clone_request(md, clone);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
static void map_tio_request(struct kthread_work *work);
|
||||
|
||||
static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
|
||||
struct mapped_device *md)
|
||||
{
|
||||
tio->md = md;
|
||||
tio->ti = NULL;
|
||||
tio->clone = NULL;
|
||||
tio->orig = rq;
|
||||
tio->error = 0;
|
||||
/*
|
||||
* Avoid initializing info for blk-mq; it passes
|
||||
* target-specific data through info.ptr
|
||||
* (see: dm_mq_init_request)
|
||||
*/
|
||||
if (!md->init_tio_pdu)
|
||||
memset(&tio->info, 0, sizeof(tio->info));
|
||||
if (md->kworker_task)
|
||||
init_kthread_work(&tio->work, map_tio_request);
|
||||
}
|
||||
|
||||
static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
|
||||
struct mapped_device *md,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct dm_rq_target_io *tio;
|
||||
int srcu_idx;
|
||||
struct dm_table *table;
|
||||
|
||||
tio = alloc_old_rq_tio(md, gfp_mask);
|
||||
if (!tio)
|
||||
return NULL;
|
||||
|
||||
init_tio(tio, rq, md);
|
||||
|
||||
table = dm_get_live_table(md, &srcu_idx);
|
||||
/*
|
||||
* Must clone a request if this .request_fn DM device
|
||||
* is stacked on .request_fn device(s).
|
||||
*/
|
||||
if (!dm_table_all_blk_mq_devices(table)) {
|
||||
if (!clone_old_rq(rq, md, tio, gfp_mask)) {
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
free_old_rq_tio(tio);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
|
||||
return tio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with the queue lock held.
|
||||
*/
|
||||
static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct mapped_device *md = q->queuedata;
|
||||
struct dm_rq_target_io *tio;
|
||||
|
||||
if (unlikely(rq->special)) {
|
||||
DMWARN("Already has something in rq->special.");
|
||||
return BLKPREP_KILL;
|
||||
}
|
||||
|
||||
tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
|
||||
if (!tio)
|
||||
return BLKPREP_DEFER;
|
||||
|
||||
rq->special = tio;
|
||||
rq->cmd_flags |= REQ_DONTPREP;
|
||||
|
||||
return BLKPREP_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns:
|
||||
* 0 : the request has been processed
|
||||
* DM_MAPIO_REQUEUE : the original request needs to be requeued
|
||||
* < 0 : the request was completed due to failure
|
||||
*/
|
||||
static int map_request(struct dm_rq_target_io *tio, struct request *rq,
|
||||
struct mapped_device *md)
|
||||
{
|
||||
int r;
|
||||
struct dm_target *ti = tio->ti;
|
||||
struct request *clone = NULL;
|
||||
|
||||
if (tio->clone) {
|
||||
clone = tio->clone;
|
||||
r = ti->type->map_rq(ti, clone, &tio->info);
|
||||
} else {
|
||||
r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
|
||||
if (r < 0) {
|
||||
/* The target wants to complete the I/O */
|
||||
dm_kill_unmapped_request(rq, r);
|
||||
return r;
|
||||
}
|
||||
if (r != DM_MAPIO_REMAPPED)
|
||||
return r;
|
||||
if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
|
||||
/* -ENOMEM */
|
||||
ti->type->release_clone_rq(clone);
|
||||
return DM_MAPIO_REQUEUE;
|
||||
}
|
||||
}
|
||||
|
||||
switch (r) {
|
||||
case DM_MAPIO_SUBMITTED:
|
||||
/* The target has taken the I/O to submit by itself later */
|
||||
break;
|
||||
case DM_MAPIO_REMAPPED:
|
||||
/* The target has remapped the I/O so dispatch it */
|
||||
trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
|
||||
blk_rq_pos(rq));
|
||||
dm_dispatch_clone_request(clone, rq);
|
||||
break;
|
||||
case DM_MAPIO_REQUEUE:
|
||||
/* The target wants to requeue the I/O */
|
||||
dm_requeue_original_request(md, tio->orig);
|
||||
break;
|
||||
default:
|
||||
if (r > 0) {
|
||||
DMWARN("unimplemented target map return value: %d", r);
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* The target wants to complete the I/O */
|
||||
dm_kill_unmapped_request(rq, r);
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dm_start_request(struct mapped_device *md, struct request *orig)
|
||||
{
|
||||
if (!orig->q->mq_ops)
|
||||
blk_start_request(orig);
|
||||
else
|
||||
blk_mq_start_request(orig);
|
||||
atomic_inc(&md->pending[rq_data_dir(orig)]);
|
||||
|
||||
if (md->seq_rq_merge_deadline_usecs) {
|
||||
md->last_rq_pos = rq_end_sector(orig);
|
||||
md->last_rq_rw = rq_data_dir(orig);
|
||||
md->last_rq_start_time = ktime_get();
|
||||
}
|
||||
|
||||
if (unlikely(dm_stats_used(&md->stats))) {
|
||||
struct dm_rq_target_io *tio = tio_from_request(orig);
|
||||
tio->duration_jiffies = jiffies;
|
||||
tio->n_sectors = blk_rq_sectors(orig);
|
||||
dm_stats_account_io(&md->stats, rq_data_dir(orig),
|
||||
blk_rq_pos(orig), tio->n_sectors, false, 0,
|
||||
&tio->stats_aux);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hold the md reference here for the in-flight I/O.
|
||||
* We can't rely on the reference count by device opener,
|
||||
* because the device may be closed during the request completion
|
||||
* when all bios are completed.
|
||||
* See the comment in rq_completed() too.
|
||||
*/
|
||||
dm_get(md);
|
||||
}
|
||||
|
||||
static void map_tio_request(struct kthread_work *work)
|
||||
{
|
||||
struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
|
||||
struct request *rq = tio->orig;
|
||||
struct mapped_device *md = tio->md;
|
||||
|
||||
if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
|
||||
dm_requeue_original_request(md, rq);
|
||||
}
|
||||
|
||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
|
||||
}
|
||||
|
||||
#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
|
||||
|
||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
unsigned deadline;
|
||||
|
||||
if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED)
|
||||
return count;
|
||||
|
||||
if (kstrtouint(buf, 10, &deadline))
|
||||
return -EINVAL;
|
||||
|
||||
if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
|
||||
deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
|
||||
|
||||
md->seq_rq_merge_deadline_usecs = deadline;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md)
|
||||
{
|
||||
ktime_t kt_deadline;
|
||||
|
||||
if (!md->seq_rq_merge_deadline_usecs)
|
||||
return false;
|
||||
|
||||
kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
|
||||
kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
|
||||
|
||||
return !ktime_after(ktime_get(), kt_deadline);
|
||||
}
|
||||
|
||||
/*
|
||||
* q->request_fn for old request-based dm.
|
||||
* Called with the queue lock held.
|
||||
*/
|
||||
static void dm_old_request_fn(struct request_queue *q)
|
||||
{
|
||||
struct mapped_device *md = q->queuedata;
|
||||
struct dm_target *ti = md->immutable_target;
|
||||
struct request *rq;
|
||||
struct dm_rq_target_io *tio;
|
||||
sector_t pos = 0;
|
||||
|
||||
if (unlikely(!ti)) {
|
||||
int srcu_idx;
|
||||
struct dm_table *map = dm_get_live_table(md, &srcu_idx);
|
||||
|
||||
ti = dm_table_find_target(map, pos);
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* For suspend, check blk_queue_stopped() and increment
|
||||
* ->pending within a single queue_lock not to increment the
|
||||
* number of in-flight I/Os after the queue is stopped in
|
||||
* dm_suspend().
|
||||
*/
|
||||
while (!blk_queue_stopped(q)) {
|
||||
rq = blk_peek_request(q);
|
||||
if (!rq)
|
||||
return;
|
||||
|
||||
/* always use block 0 to find the target for flushes for now */
|
||||
pos = 0;
|
||||
if (req_op(rq) != REQ_OP_FLUSH)
|
||||
pos = blk_rq_pos(rq);
|
||||
|
||||
if ((dm_old_request_peeked_before_merge_deadline(md) &&
|
||||
md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
|
||||
md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
|
||||
(ti->type->busy && ti->type->busy(ti))) {
|
||||
blk_delay_queue(q, 10);
|
||||
return;
|
||||
}
|
||||
|
||||
dm_start_request(md, rq);
|
||||
|
||||
tio = tio_from_request(rq);
|
||||
/* Establish tio->ti before queuing work (map_tio_request) */
|
||||
tio->ti = ti;
|
||||
queue_kthread_work(&md->kworker, &tio->work);
|
||||
BUG_ON(!irqs_disabled());
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Fully initialize a .request_fn request-based queue.
|
||||
*/
|
||||
int dm_old_init_request_queue(struct mapped_device *md)
|
||||
{
|
||||
/* Fully initialize the queue */
|
||||
if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL))
|
||||
return -EINVAL;
|
||||
|
||||
/* disable dm_old_request_fn's merge heuristic by default */
|
||||
md->seq_rq_merge_deadline_usecs = 0;
|
||||
|
||||
dm_init_normal_md_queue(md);
|
||||
blk_queue_softirq_done(md->queue, dm_softirq_done);
|
||||
blk_queue_prep_rq(md->queue, dm_old_prep_fn);
|
||||
|
||||
/* Initialize the request-based DM worker thread */
|
||||
init_kthread_worker(&md->kworker);
|
||||
md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
|
||||
"kdmwork-%s", dm_device_name(md));
|
||||
if (IS_ERR(md->kworker_task))
|
||||
return PTR_ERR(md->kworker_task);
|
||||
|
||||
elv_register_queue(md->queue);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dm_mq_init_request(void *data, struct request *rq,
|
||||
unsigned int hctx_idx, unsigned int request_idx,
|
||||
unsigned int numa_node)
|
||||
{
|
||||
struct mapped_device *md = data;
|
||||
struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
|
||||
|
||||
/*
|
||||
* Must initialize md member of tio, otherwise it won't
|
||||
* be available in dm_mq_queue_rq.
|
||||
*/
|
||||
tio->md = md;
|
||||
|
||||
if (md->init_tio_pdu) {
|
||||
/* target-specific per-io data is immediately after the tio */
|
||||
tio->info.ptr = tio + 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *rq = bd->rq;
|
||||
struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
|
||||
struct mapped_device *md = tio->md;
|
||||
struct dm_target *ti = md->immutable_target;
|
||||
|
||||
if (unlikely(!ti)) {
|
||||
int srcu_idx;
|
||||
struct dm_table *map = dm_get_live_table(md, &srcu_idx);
|
||||
|
||||
ti = dm_table_find_target(map, 0);
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
}
|
||||
|
||||
if (ti->type->busy && ti->type->busy(ti))
|
||||
return BLK_MQ_RQ_QUEUE_BUSY;
|
||||
|
||||
dm_start_request(md, rq);
|
||||
|
||||
/* Init tio using md established in .init_request */
|
||||
init_tio(tio, rq, md);
|
||||
|
||||
/*
|
||||
* Establish tio->ti before calling map_request().
|
||||
*/
|
||||
tio->ti = ti;
|
||||
|
||||
/* Direct call is fine since .queue_rq allows allocations */
|
||||
if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
|
||||
/* Undo dm_start_request() before requeuing */
|
||||
rq_end_stats(md, rq);
|
||||
rq_completed(md, rq_data_dir(rq), false);
|
||||
return BLK_MQ_RQ_QUEUE_BUSY;
|
||||
}
|
||||
|
||||
return BLK_MQ_RQ_QUEUE_OK;
|
||||
}
|
||||
|
||||
static struct blk_mq_ops dm_mq_ops = {
|
||||
.queue_rq = dm_mq_queue_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.complete = dm_softirq_done,
|
||||
.init_request = dm_mq_init_request,
|
||||
};
|
||||
|
||||
int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct dm_target *immutable_tgt;
|
||||
int err;
|
||||
|
||||
if (!dm_table_all_blk_mq_devices(t)) {
|
||||
DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
|
||||
if (!md->tag_set)
|
||||
return -ENOMEM;
|
||||
|
||||
md->tag_set->ops = &dm_mq_ops;
|
||||
md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
|
||||
md->tag_set->numa_node = md->numa_node_id;
|
||||
md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
|
||||
md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
|
||||
md->tag_set->driver_data = md;
|
||||
|
||||
md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
|
||||
immutable_tgt = dm_table_get_immutable_target(t);
|
||||
if (immutable_tgt && immutable_tgt->per_io_data_size) {
|
||||
/* any target-specific per-io data is immediately after the tio */
|
||||
md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
|
||||
md->init_tio_pdu = true;
|
||||
}
|
||||
|
||||
err = blk_mq_alloc_tag_set(md->tag_set);
|
||||
if (err)
|
||||
goto out_kfree_tag_set;
|
||||
|
||||
q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
|
||||
if (IS_ERR(q)) {
|
||||
err = PTR_ERR(q);
|
||||
goto out_tag_set;
|
||||
}
|
||||
dm_init_md_queue(md);
|
||||
|
||||
/* backfill 'mq' sysfs registration normally done in blk_register_queue */
|
||||
blk_mq_register_disk(md->disk);
|
||||
|
||||
return 0;
|
||||
|
||||
out_tag_set:
|
||||
blk_mq_free_tag_set(md->tag_set);
|
||||
out_kfree_tag_set:
|
||||
kfree(md->tag_set);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void dm_mq_cleanup_mapped_device(struct mapped_device *md)
|
||||
{
|
||||
if (md->tag_set) {
|
||||
blk_mq_free_tag_set(md->tag_set);
|
||||
kfree(md->tag_set);
|
||||
}
|
||||
}
|
||||
|
||||
module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
|
||||
|
||||
module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
|
||||
|
||||
module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
|
||||
|
||||
module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");
|
64
drivers/md/dm-rq.h
Normal file
64
drivers/md/dm-rq.h
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Internal header file for device mapper
|
||||
*
|
||||
* Copyright (C) 2016 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This file is released under the LGPL.
|
||||
*/
|
||||
|
||||
#ifndef DM_RQ_INTERNAL_H
|
||||
#define DM_RQ_INTERNAL_H
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/kthread.h>
|
||||
|
||||
#include "dm-stats.h"
|
||||
|
||||
struct mapped_device;
|
||||
|
||||
/*
|
||||
* One of these is allocated per request.
|
||||
*/
|
||||
struct dm_rq_target_io {
|
||||
struct mapped_device *md;
|
||||
struct dm_target *ti;
|
||||
struct request *orig, *clone;
|
||||
struct kthread_work work;
|
||||
int error;
|
||||
union map_info info;
|
||||
struct dm_stats_aux stats_aux;
|
||||
unsigned long duration_jiffies;
|
||||
unsigned n_sectors;
|
||||
};
|
||||
|
||||
/*
|
||||
* For request-based dm - the bio clones we allocate are embedded in these
|
||||
* structs.
|
||||
*
|
||||
* We allocate these with bio_alloc_bioset, using the front_pad parameter when
|
||||
* the bioset is created - this means the bio has to come at the end of the
|
||||
* struct.
|
||||
*/
|
||||
struct dm_rq_clone_bio_info {
|
||||
struct bio *orig;
|
||||
struct dm_rq_target_io *tio;
|
||||
struct bio clone;
|
||||
};
|
||||
|
||||
bool dm_use_blk_mq_default(void);
|
||||
bool dm_use_blk_mq(struct mapped_device *md);
|
||||
|
||||
int dm_old_init_request_queue(struct mapped_device *md);
|
||||
int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t);
|
||||
void dm_mq_cleanup_mapped_device(struct mapped_device *md);
|
||||
|
||||
void dm_start_queue(struct request_queue *q);
|
||||
void dm_stop_queue(struct request_queue *q);
|
||||
|
||||
unsigned dm_get_reserved_rq_based_ios(void);
|
||||
|
||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
|
||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
|
||||
const char *buf, size_t count);
|
||||
|
||||
#endif
|
@ -2302,6 +2302,13 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
|
||||
return do_origin(o->dev, bio);
|
||||
}
|
||||
|
||||
static long origin_direct_access(struct dm_target *ti, sector_t sector,
|
||||
void __pmem **kaddr, pfn_t *pfn, long size)
|
||||
{
|
||||
DMWARN("device does not support dax.");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the target "max_io_len" field to the minimum of all the snapshots'
|
||||
* chunk sizes.
|
||||
@ -2361,6 +2368,7 @@ static struct target_type origin_target = {
|
||||
.postsuspend = origin_postsuspend,
|
||||
.status = origin_status,
|
||||
.iterate_devices = origin_iterate_devices,
|
||||
.direct_access = origin_direct_access,
|
||||
};
|
||||
|
||||
static struct target_type snapshot_target = {
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-core.h"
|
||||
#include "dm-stats.h"
|
||||
|
||||
#define DM_MSG_PREFIX "stats"
|
||||
|
@ -308,6 +308,29 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
|
||||
return DM_MAPIO_REMAPPED;
|
||||
}
|
||||
|
||||
static long stripe_direct_access(struct dm_target *ti, sector_t sector,
|
||||
void __pmem **kaddr, pfn_t *pfn, long size)
|
||||
{
|
||||
struct stripe_c *sc = ti->private;
|
||||
uint32_t stripe;
|
||||
struct block_device *bdev;
|
||||
struct blk_dax_ctl dax = {
|
||||
.size = size,
|
||||
};
|
||||
long ret;
|
||||
|
||||
stripe_map_sector(sc, sector, &stripe, &dax.sector);
|
||||
|
||||
dax.sector += sc->stripe[stripe].physical_start;
|
||||
bdev = sc->stripe[stripe].dev->bdev;
|
||||
|
||||
ret = bdev_direct_access(bdev, &dax);
|
||||
*kaddr = dax.addr;
|
||||
*pfn = dax.pfn;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stripe status:
|
||||
*
|
||||
@ -416,7 +439,7 @@ static void stripe_io_hints(struct dm_target *ti,
|
||||
|
||||
static struct target_type stripe_target = {
|
||||
.name = "striped",
|
||||
.version = {1, 5, 1},
|
||||
.version = {1, 6, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = stripe_ctr,
|
||||
.dtr = stripe_dtr,
|
||||
@ -425,6 +448,7 @@ static struct target_type stripe_target = {
|
||||
.status = stripe_status,
|
||||
.iterate_devices = stripe_iterate_devices,
|
||||
.io_hints = stripe_io_hints,
|
||||
.direct_access = stripe_direct_access,
|
||||
};
|
||||
|
||||
int __init dm_stripe_init(void)
|
||||
|
@ -6,7 +6,8 @@
|
||||
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/dm-ioctl.h>
|
||||
#include "dm.h"
|
||||
#include "dm-core.h"
|
||||
#include "dm-rq.h"
|
||||
|
||||
struct dm_sysfs_attr {
|
||||
struct attribute attr;
|
||||
|
@ -5,7 +5,7 @@
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-core.h"
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/vmalloc.h>
|
||||
@ -43,8 +43,10 @@ struct dm_table {
|
||||
struct dm_target *targets;
|
||||
|
||||
struct target_type *immutable_target_type;
|
||||
unsigned integrity_supported:1;
|
||||
unsigned singleton:1;
|
||||
|
||||
bool integrity_supported:1;
|
||||
bool singleton:1;
|
||||
bool all_blk_mq:1;
|
||||
|
||||
/*
|
||||
* Indicates the rw permissions for the new logical
|
||||
@ -206,6 +208,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
t->type = DM_TYPE_NONE;
|
||||
t->mode = mode;
|
||||
t->md = md;
|
||||
*result = t;
|
||||
@ -703,7 +706,7 @@ int dm_table_add_target(struct dm_table *t, const char *type,
|
||||
dm_device_name(t->md), type);
|
||||
return -EINVAL;
|
||||
}
|
||||
t->singleton = 1;
|
||||
t->singleton = true;
|
||||
}
|
||||
|
||||
if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) {
|
||||
@ -824,22 +827,70 @@ void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
|
||||
}
|
||||
EXPORT_SYMBOL(dm_consume_args);
|
||||
|
||||
static bool __table_type_bio_based(unsigned table_type)
|
||||
{
|
||||
return (table_type == DM_TYPE_BIO_BASED ||
|
||||
table_type == DM_TYPE_DAX_BIO_BASED);
|
||||
}
|
||||
|
||||
static bool __table_type_request_based(unsigned table_type)
|
||||
{
|
||||
return (table_type == DM_TYPE_REQUEST_BASED ||
|
||||
table_type == DM_TYPE_MQ_REQUEST_BASED);
|
||||
}
|
||||
|
||||
static int dm_table_set_type(struct dm_table *t)
|
||||
void dm_table_set_type(struct dm_table *t, unsigned type)
|
||||
{
|
||||
t->type = type;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_table_set_type);
|
||||
|
||||
static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(dev->bdev);
|
||||
|
||||
return q && blk_queue_dax(q);
|
||||
}
|
||||
|
||||
static bool dm_table_supports_dax(struct dm_table *t)
|
||||
{
|
||||
struct dm_target *ti;
|
||||
unsigned i = 0;
|
||||
|
||||
/* Ensure that all targets support DAX. */
|
||||
while (i < dm_table_get_num_targets(t)) {
|
||||
ti = dm_table_get_target(t, i++);
|
||||
|
||||
if (!ti->type->direct_access)
|
||||
return false;
|
||||
|
||||
if (!ti->type->iterate_devices ||
|
||||
!ti->type->iterate_devices(ti, device_supports_dax, NULL))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int dm_table_determine_type(struct dm_table *t)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned bio_based = 0, request_based = 0, hybrid = 0;
|
||||
bool use_blk_mq = false;
|
||||
bool verify_blk_mq = false;
|
||||
struct dm_target *tgt;
|
||||
struct dm_dev_internal *dd;
|
||||
struct list_head *devices;
|
||||
struct list_head *devices = dm_table_get_devices(t);
|
||||
unsigned live_md_type = dm_get_md_type(t->md);
|
||||
|
||||
if (t->type != DM_TYPE_NONE) {
|
||||
/* target already set the table's type */
|
||||
if (t->type == DM_TYPE_BIO_BASED)
|
||||
return 0;
|
||||
BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
|
||||
goto verify_rq_based;
|
||||
}
|
||||
|
||||
for (i = 0; i < t->num_targets; i++) {
|
||||
tgt = t->targets + i;
|
||||
if (dm_target_hybrid(tgt))
|
||||
@ -871,11 +922,27 @@ static int dm_table_set_type(struct dm_table *t)
|
||||
if (bio_based) {
|
||||
/* We must use this table as bio-based */
|
||||
t->type = DM_TYPE_BIO_BASED;
|
||||
if (dm_table_supports_dax(t) ||
|
||||
(list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED))
|
||||
t->type = DM_TYPE_DAX_BIO_BASED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
BUG_ON(!request_based); /* No targets in this table */
|
||||
|
||||
if (list_empty(devices) && __table_type_request_based(live_md_type)) {
|
||||
/* inherit live MD type */
|
||||
t->type = live_md_type;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
|
||||
* having a compatible target use dm_table_set_type.
|
||||
*/
|
||||
t->type = DM_TYPE_REQUEST_BASED;
|
||||
|
||||
verify_rq_based:
|
||||
/*
|
||||
* Request-based dm supports only tables that have a single target now.
|
||||
* To support multiple targets, request splitting support is needed,
|
||||
@ -888,7 +955,6 @@ static int dm_table_set_type(struct dm_table *t)
|
||||
}
|
||||
|
||||
/* Non-request-stackable devices can't be used for request-based dm */
|
||||
devices = dm_table_get_devices(t);
|
||||
list_for_each_entry(dd, devices, list) {
|
||||
struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
|
||||
|
||||
@ -899,10 +965,10 @@ static int dm_table_set_type(struct dm_table *t)
|
||||
}
|
||||
|
||||
if (q->mq_ops)
|
||||
use_blk_mq = true;
|
||||
verify_blk_mq = true;
|
||||
}
|
||||
|
||||
if (use_blk_mq) {
|
||||
if (verify_blk_mq) {
|
||||
/* verify _all_ devices in the table are blk-mq devices */
|
||||
list_for_each_entry(dd, devices, list)
|
||||
if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) {
|
||||
@ -910,14 +976,9 @@ static int dm_table_set_type(struct dm_table *t)
|
||||
" are blk-mq request-stackable");
|
||||
return -EINVAL;
|
||||
}
|
||||
t->type = DM_TYPE_MQ_REQUEST_BASED;
|
||||
|
||||
} else if (list_empty(devices) && __table_type_request_based(live_md_type)) {
|
||||
/* inherit live MD type */
|
||||
t->type = live_md_type;
|
||||
|
||||
} else
|
||||
t->type = DM_TYPE_REQUEST_BASED;
|
||||
t->all_blk_mq = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -956,14 +1017,19 @@ struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool dm_table_bio_based(struct dm_table *t)
|
||||
{
|
||||
return __table_type_bio_based(dm_table_get_type(t));
|
||||
}
|
||||
|
||||
bool dm_table_request_based(struct dm_table *t)
|
||||
{
|
||||
return __table_type_request_based(dm_table_get_type(t));
|
||||
}
|
||||
|
||||
bool dm_table_mq_request_based(struct dm_table *t)
|
||||
bool dm_table_all_blk_mq_devices(struct dm_table *t)
|
||||
{
|
||||
return dm_table_get_type(t) == DM_TYPE_MQ_REQUEST_BASED;
|
||||
return t->all_blk_mq;
|
||||
}
|
||||
|
||||
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
|
||||
@ -978,7 +1044,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (type == DM_TYPE_BIO_BASED)
|
||||
if (__table_type_bio_based(type))
|
||||
for (i = 0; i < t->num_targets; i++) {
|
||||
tgt = t->targets + i;
|
||||
per_io_data_size = max(per_io_data_size, tgt->per_io_data_size);
|
||||
@ -1106,7 +1172,7 @@ static int dm_table_register_integrity(struct dm_table *t)
|
||||
return 0;
|
||||
|
||||
if (!integrity_profile_exists(dm_disk(md))) {
|
||||
t->integrity_supported = 1;
|
||||
t->integrity_supported = true;
|
||||
/*
|
||||
* Register integrity profile during table load; we can do
|
||||
* this because the final profile must match during resume.
|
||||
@ -1129,7 +1195,7 @@ static int dm_table_register_integrity(struct dm_table *t)
|
||||
}
|
||||
|
||||
/* Preserve existing integrity profile */
|
||||
t->integrity_supported = 1;
|
||||
t->integrity_supported = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1141,9 +1207,9 @@ int dm_table_complete(struct dm_table *t)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = dm_table_set_type(t);
|
||||
r = dm_table_determine_type(t);
|
||||
if (r) {
|
||||
DMERR("unable to set table type");
|
||||
DMERR("unable to determine table type");
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-core.h"
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
@ -148,9 +148,15 @@ static void io_err_release_clone_rq(struct request *clone)
|
||||
{
|
||||
}
|
||||
|
||||
static long io_err_direct_access(struct dm_target *ti, sector_t sector,
|
||||
void __pmem **kaddr, pfn_t *pfn, long size)
|
||||
{
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static struct target_type error_target = {
|
||||
.name = "error",
|
||||
.version = {1, 4, 0},
|
||||
.version = {1, 5, 0},
|
||||
.features = DM_TARGET_WILDCARD,
|
||||
.ctr = io_err_ctr,
|
||||
.dtr = io_err_dtr,
|
||||
@ -158,6 +164,7 @@ static struct target_type error_target = {
|
||||
.map_rq = io_err_map_rq,
|
||||
.clone_and_map_rq = io_err_clone_and_map_rq,
|
||||
.release_clone_rq = io_err_release_clone_rq,
|
||||
.direct_access = io_err_direct_access,
|
||||
};
|
||||
|
||||
int __init dm_target_init(void)
|
||||
|
@ -1677,6 +1677,36 @@ int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *resu
|
||||
return r;
|
||||
}
|
||||
|
||||
int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
down_write(&pmd->root_lock);
|
||||
for (; b != e; b++) {
|
||||
r = dm_sm_inc_block(pmd->data_sm, b);
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
up_write(&pmd->root_lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
down_write(&pmd->root_lock);
|
||||
for (; b != e; b++) {
|
||||
r = dm_sm_dec_block(pmd->data_sm, b);
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
up_write(&pmd->root_lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
bool dm_thin_changed_this_transaction(struct dm_thin_device *td)
|
||||
{
|
||||
int r;
|
||||
|
@ -197,6 +197,9 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result);
|
||||
|
||||
int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result);
|
||||
|
||||
int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e);
|
||||
int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e);
|
||||
|
||||
/*
|
||||
* Returns -ENOSPC if the new size is too small and already allocated
|
||||
* blocks would be lost.
|
||||
|
@ -253,6 +253,7 @@ struct pool {
|
||||
struct bio_list deferred_flush_bios;
|
||||
struct list_head prepared_mappings;
|
||||
struct list_head prepared_discards;
|
||||
struct list_head prepared_discards_pt2;
|
||||
struct list_head active_thins;
|
||||
|
||||
struct dm_deferred_set *shared_read_ds;
|
||||
@ -269,6 +270,7 @@ struct pool {
|
||||
|
||||
process_mapping_fn process_prepared_mapping;
|
||||
process_mapping_fn process_prepared_discard;
|
||||
process_mapping_fn process_prepared_discard_pt2;
|
||||
|
||||
struct dm_bio_prison_cell **cell_sort_array;
|
||||
};
|
||||
@ -1001,7 +1003,8 @@ static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m)
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m)
|
||||
static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m,
|
||||
struct bio *discard_parent)
|
||||
{
|
||||
/*
|
||||
* We've already unmapped this range of blocks, but before we
|
||||
@ -1014,7 +1017,7 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
|
||||
dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin;
|
||||
struct discard_op op;
|
||||
|
||||
begin_discard(&op, tc, m->bio);
|
||||
begin_discard(&op, tc, discard_parent);
|
||||
while (b != end) {
|
||||
/* find start of unmapped run */
|
||||
for (; b < end; b++) {
|
||||
@ -1049,27 +1052,100 @@ out:
|
||||
end_discard(&op, r);
|
||||
}
|
||||
|
||||
static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
|
||||
static void queue_passdown_pt2(struct dm_thin_new_mapping *m)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct pool *pool = m->tc->pool;
|
||||
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
list_add_tail(&m->list, &pool->prepared_discards_pt2);
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
wake_worker(pool);
|
||||
}
|
||||
|
||||
static void passdown_endio(struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* It doesn't matter if the passdown discard failed, we still want
|
||||
* to unmap (we ignore err).
|
||||
*/
|
||||
queue_passdown_pt2(bio->bi_private);
|
||||
}
|
||||
|
||||
static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
|
||||
{
|
||||
int r;
|
||||
struct thin_c *tc = m->tc;
|
||||
struct pool *pool = tc->pool;
|
||||
struct bio *discard_parent;
|
||||
dm_block_t data_end = m->data_block + (m->virt_end - m->virt_begin);
|
||||
|
||||
/*
|
||||
* Only this thread allocates blocks, so we can be sure that the
|
||||
* newly unmapped blocks will not be allocated before the end of
|
||||
* the function.
|
||||
*/
|
||||
r = dm_thin_remove_range(tc->td, m->virt_begin, m->virt_end);
|
||||
if (r) {
|
||||
metadata_operation_failed(pool, "dm_thin_remove_range", r);
|
||||
bio_io_error(m->bio);
|
||||
cell_defer_no_holder(tc, m->cell);
|
||||
mempool_free(m, pool->mapping_pool);
|
||||
return;
|
||||
}
|
||||
|
||||
discard_parent = bio_alloc(GFP_NOIO, 1);
|
||||
if (!discard_parent) {
|
||||
DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
|
||||
dm_device_name(tc->pool->pool_md));
|
||||
queue_passdown_pt2(m);
|
||||
|
||||
} else {
|
||||
discard_parent->bi_end_io = passdown_endio;
|
||||
discard_parent->bi_private = m;
|
||||
|
||||
if (m->maybe_shared)
|
||||
passdown_double_checking_shared_status(m, discard_parent);
|
||||
else {
|
||||
struct discard_op op;
|
||||
|
||||
begin_discard(&op, tc, discard_parent);
|
||||
r = issue_discard(&op, m->data_block, data_end);
|
||||
end_discard(&op, r);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment the unmapped blocks. This prevents a race between the
|
||||
* passdown io and reallocation of freed blocks.
|
||||
*/
|
||||
r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
|
||||
if (r) {
|
||||
metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
|
||||
bio_io_error(m->bio);
|
||||
cell_defer_no_holder(tc, m->cell);
|
||||
mempool_free(m, pool->mapping_pool);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
|
||||
{
|
||||
int r;
|
||||
struct thin_c *tc = m->tc;
|
||||
struct pool *pool = tc->pool;
|
||||
|
||||
r = dm_thin_remove_range(tc->td, m->virt_begin, m->virt_end);
|
||||
/*
|
||||
* The passdown has completed, so now we can decrement all those
|
||||
* unmapped blocks.
|
||||
*/
|
||||
r = dm_pool_dec_data_range(pool->pmd, m->data_block,
|
||||
m->data_block + (m->virt_end - m->virt_begin));
|
||||
if (r) {
|
||||
metadata_operation_failed(pool, "dm_thin_remove_range", r);
|
||||
metadata_operation_failed(pool, "dm_pool_dec_data_range", r);
|
||||
bio_io_error(m->bio);
|
||||
|
||||
} else if (m->maybe_shared) {
|
||||
passdown_double_checking_shared_status(m);
|
||||
|
||||
} else {
|
||||
struct discard_op op;
|
||||
begin_discard(&op, tc, m->bio);
|
||||
r = issue_discard(&op, m->data_block,
|
||||
m->data_block + (m->virt_end - m->virt_begin));
|
||||
end_discard(&op, r);
|
||||
}
|
||||
} else
|
||||
bio_endio(m->bio);
|
||||
|
||||
cell_defer_no_holder(tc, m->cell);
|
||||
mempool_free(m, pool->mapping_pool);
|
||||
@ -2215,6 +2291,8 @@ static void do_worker(struct work_struct *ws)
|
||||
throttle_work_update(&pool->throttle);
|
||||
process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
|
||||
throttle_work_update(&pool->throttle);
|
||||
process_prepared(pool, &pool->prepared_discards_pt2, &pool->process_prepared_discard_pt2);
|
||||
throttle_work_update(&pool->throttle);
|
||||
process_deferred_bios(pool);
|
||||
throttle_work_complete(&pool->throttle);
|
||||
}
|
||||
@ -2343,7 +2421,8 @@ static void set_discard_callbacks(struct pool *pool)
|
||||
|
||||
if (passdown_enabled(pt)) {
|
||||
pool->process_discard_cell = process_discard_cell_passdown;
|
||||
pool->process_prepared_discard = process_prepared_discard_passdown;
|
||||
pool->process_prepared_discard = process_prepared_discard_passdown_pt1;
|
||||
pool->process_prepared_discard_pt2 = process_prepared_discard_passdown_pt2;
|
||||
} else {
|
||||
pool->process_discard_cell = process_discard_cell_no_passdown;
|
||||
pool->process_prepared_discard = process_prepared_discard_no_passdown;
|
||||
@ -2830,6 +2909,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
|
||||
bio_list_init(&pool->deferred_flush_bios);
|
||||
INIT_LIST_HEAD(&pool->prepared_mappings);
|
||||
INIT_LIST_HEAD(&pool->prepared_discards);
|
||||
INIT_LIST_HEAD(&pool->prepared_discards_pt2);
|
||||
INIT_LIST_HEAD(&pool->active_thins);
|
||||
pool->low_water_triggered = false;
|
||||
pool->suspended = true;
|
||||
|
@ -453,9 +453,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
|
||||
*/
|
||||
|
||||
offset = block << v->data_dev_block_bits;
|
||||
|
||||
res = offset;
|
||||
div64_u64(res, v->fec->rounds << v->data_dev_block_bits);
|
||||
res = div64_u64(offset, v->fec->rounds << v->data_dev_block_bits);
|
||||
|
||||
/*
|
||||
* The base RS block we can feed to the interleaver to find out all
|
||||
|
1241
drivers/md/dm.c
1241
drivers/md/dm.c
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/device-mapper.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/hdreg.h>
|
||||
@ -32,14 +33,6 @@
|
||||
*/
|
||||
#define DM_STATUS_NOFLUSH_FLAG (1 << 0)
|
||||
|
||||
/*
|
||||
* Type of table and mapped_device's mempool
|
||||
*/
|
||||
#define DM_TYPE_NONE 0
|
||||
#define DM_TYPE_BIO_BASED 1
|
||||
#define DM_TYPE_REQUEST_BASED 2
|
||||
#define DM_TYPE_MQ_REQUEST_BASED 3
|
||||
|
||||
/*
|
||||
* List of devices that a metadevice uses and should open/close.
|
||||
*/
|
||||
@ -75,8 +68,9 @@ unsigned dm_table_get_type(struct dm_table *t);
|
||||
struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
|
||||
struct dm_target *dm_table_get_immutable_target(struct dm_table *t);
|
||||
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
|
||||
bool dm_table_bio_based(struct dm_table *t);
|
||||
bool dm_table_request_based(struct dm_table *t);
|
||||
bool dm_table_mq_request_based(struct dm_table *t);
|
||||
bool dm_table_all_blk_mq_devices(struct dm_table *t);
|
||||
void dm_table_free_md_mempools(struct dm_table *t);
|
||||
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
|
||||
|
||||
@ -161,16 +155,6 @@ void dm_interface_exit(void);
|
||||
/*
|
||||
* sysfs interface
|
||||
*/
|
||||
struct dm_kobject_holder {
|
||||
struct kobject kobj;
|
||||
struct completion completion;
|
||||
};
|
||||
|
||||
static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
|
||||
{
|
||||
return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
|
||||
}
|
||||
|
||||
int dm_sysfs_init(struct mapped_device *md);
|
||||
void dm_sysfs_exit(struct mapped_device *md);
|
||||
struct kobject *dm_kobject(struct mapped_device *md);
|
||||
@ -212,8 +196,6 @@ int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
|
||||
void dm_internal_suspend(struct mapped_device *md);
|
||||
void dm_internal_resume(struct mapped_device *md);
|
||||
|
||||
bool dm_use_blk_mq(struct mapped_device *md);
|
||||
|
||||
int dm_io_init(void);
|
||||
void dm_io_exit(void);
|
||||
|
||||
@ -228,18 +210,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
|
||||
void dm_free_md_mempools(struct dm_md_mempools *pools);
|
||||
|
||||
/*
|
||||
* Helpers that are used by DM core
|
||||
* Various helpers
|
||||
*/
|
||||
unsigned dm_get_reserved_bio_based_ios(void);
|
||||
unsigned dm_get_reserved_rq_based_ios(void);
|
||||
|
||||
static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
|
||||
{
|
||||
return !maxlen || strlen(result) + 1 >= maxlen;
|
||||
}
|
||||
|
||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
|
||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
|
||||
const char *buf, size_t count);
|
||||
|
||||
#endif
|
||||
|
@ -429,7 +429,14 @@ static int dm_btree_lookup_next_single(struct dm_btree_info *info, dm_block_t ro
|
||||
|
||||
if (flags & INTERNAL_NODE) {
|
||||
i = lower_bound(n, key);
|
||||
if (i < 0 || i >= nr_entries) {
|
||||
if (i < 0) {
|
||||
/*
|
||||
* avoid early -ENODATA return when all entries are
|
||||
* higher than the search @key.
|
||||
*/
|
||||
i = 0;
|
||||
}
|
||||
if (i >= nr_entries) {
|
||||
r = -ENODATA;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1619,8 +1619,7 @@ static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
|
||||
return -EOPNOTSUPP;
|
||||
return sd_pr_command(bdev, (flags & PR_FL_IGNORE_KEY) ? 0x06 : 0x00,
|
||||
old_key, new_key, 0,
|
||||
(1 << 0) /* APTPL */ |
|
||||
(1 << 2) /* ALL_TG_PT */);
|
||||
(1 << 0) /* APTPL */);
|
||||
}
|
||||
|
||||
static int sd_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
|
||||
|
@ -19,6 +19,15 @@ struct dm_table;
|
||||
struct mapped_device;
|
||||
struct bio_vec;
|
||||
|
||||
/*
|
||||
* Type of table, mapped_device's mempool and request_queue
|
||||
*/
|
||||
#define DM_TYPE_NONE 0
|
||||
#define DM_TYPE_BIO_BASED 1
|
||||
#define DM_TYPE_REQUEST_BASED 2
|
||||
#define DM_TYPE_MQ_REQUEST_BASED 3
|
||||
#define DM_TYPE_DAX_BIO_BASED 4
|
||||
|
||||
typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
|
||||
|
||||
union map_info {
|
||||
@ -116,6 +125,14 @@ typedef void (*dm_io_hints_fn) (struct dm_target *ti,
|
||||
*/
|
||||
typedef int (*dm_busy_fn) (struct dm_target *ti);
|
||||
|
||||
/*
|
||||
* Returns:
|
||||
* < 0 : error
|
||||
* >= 0 : the number of bytes accessible at the address
|
||||
*/
|
||||
typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
|
||||
void __pmem **kaddr, pfn_t *pfn, long size);
|
||||
|
||||
void dm_error(const char *message);
|
||||
|
||||
struct dm_dev {
|
||||
@ -162,6 +179,7 @@ struct target_type {
|
||||
dm_busy_fn busy;
|
||||
dm_iterate_devices_fn iterate_devices;
|
||||
dm_io_hints_fn io_hints;
|
||||
dm_direct_access_fn direct_access;
|
||||
|
||||
/* For internal device-mapper use. */
|
||||
struct list_head list;
|
||||
@ -443,6 +461,14 @@ int dm_table_add_target(struct dm_table *t, const char *type,
|
||||
*/
|
||||
void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb);
|
||||
|
||||
/*
|
||||
* Target can use this to set the table's type.
|
||||
* Can only ever be called from a target's ctr.
|
||||
* Useful for "hybrid" target (supports both bio-based
|
||||
* and request-based).
|
||||
*/
|
||||
void dm_table_set_type(struct dm_table *t, unsigned type);
|
||||
|
||||
/*
|
||||
* Finally call this to make the table ready for use.
|
||||
*/
|
||||
|
@ -267,9 +267,9 @@ enum {
|
||||
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
|
||||
|
||||
#define DM_VERSION_MAJOR 4
|
||||
#define DM_VERSION_MINOR 34
|
||||
#define DM_VERSION_MINOR 35
|
||||
#define DM_VERSION_PATCHLEVEL 0
|
||||
#define DM_VERSION_EXTRA "-ioctl (2015-10-28)"
|
||||
#define DM_VERSION_EXTRA "-ioctl (2016-06-23)"
|
||||
|
||||
/* Status bits */
|
||||
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
|
||||
|
Loading…
Reference in New Issue
Block a user