mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-27 14:14:24 +08:00
- Most extensive changes this cycle are the DM core improvements to add
full blk-mq support to request-based DM. - disabled by default but user can opt-in with CONFIG_DM_MQ_DEFAULT - depends on some blk-mq changes from Jens' for-4.1/core branch so that explains why this pull is built on linux-block.git - Update DM to use name_to_dev_t() rather than open-coding a less capable device parser. - includes a couple small improvements to name_to_dev_t() that offer stricter constraints that DM's code provided. - Improvements to the dm-cache "mq" cache replacement policy. - A DM crypt crypt_ctr() error path fix and an async crypto deadlock fix. - A small efficiency improvement for DM crypt decryption by leveraging immutable biovecs. - Add error handling modes for corrupted blocks to DM verity. - A new "log-writes" DM target from Josef Bacik that is meant for file system developers to test file system integrity at particular points in the life of a file system. - A few DM log userspace cleanups and fixes. - A few Documentation fixes (for thin, cache, crypt and switch). -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJVMGvhAAoJEMUj8QotnQNaOKgH/iuWyEZ+ndWbeEBZs9ZJPEgU sRiKjSaPujkdB+78o+2DfAYjwCchqH1Oy781PqG85asTENVkcjD7LBPaIom/A2ml Z9ZeCzAKbrtB3lZR8QAg4u7+90kkuZv1SeOPMWBZCEV4GYLEpV1J4/f+RgdEs1kT upQcH1fFoQdyHGikwAKXf85Gmi4OrjVb19yoxu2xZnfwT2sCPq0okU4yBxb1B9mL X/FcHUeLS9LJGewEqTD3AvWrk+J5pqj+1EeKY2kRxisp1065TaljYwetgR76Vnfb o0J5eovJVL3AKRTYxvr5JABWD09xq9nG1hVBiuQMN5VmmvxjDKdbBPaKI4dZTt0= =uVjp -----END PGP SIGNATURE----- Merge tag 'dm-4.1-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper updates from Mike Snitzer: - the most extensive changes this cycle are the DM core improvements to add full blk-mq support to request-based DM. - disabled by default but user can opt-in with CONFIG_DM_MQ_DEFAULT - depends on some blk-mq changes from Jens' for-4.1/core branch so that explains why this pull is built on linux-block.git - update DM to use name_to_dev_t() rather than open-coding a less capable device parser. - includes a couple small improvements to name_to_dev_t() that offer stricter constraints that DM's code provided. - improvements to the dm-cache "mq" cache replacement policy. - a DM crypt crypt_ctr() error path fix and an async crypto deadlock fix - a small efficiency improvement for DM crypt decryption by leveraging immutable biovecs - add error handling modes for corrupted blocks to DM verity - a new "log-writes" DM target from Josef Bacik that is meant for file system developers to test file system integrity at particular points in the life of a file system - a few DM log userspace cleanups and fixes - a few Documentation fixes (for thin, cache, crypt and switch) * tag 'dm-4.1-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (34 commits) dm crypt: fix missing error code return from crypt_ctr error path dm crypt: fix deadlock when async crypto algorithm returns -EBUSY dm crypt: leverage immutable biovecs when decrypting on read dm crypt: update URLs to new cryptsetup project page dm: add log writes target dm table: use bool function return values of true/false not 1/0 dm verity: add error handling modes for corrupted blocks dm thin: remove stale 'trim' message documentation dm delay: use msecs_to_jiffies for time conversion dm log userspace base: fix compile warning dm log userspace transfer: match wait_for_completion_timeout return type dm table: fall back to getting device using name_to_dev_t() init: stricter checking of major:minor root= values init: export name_to_dev_t and mark name argument as const dm: add 'use_blk_mq' module param and expose in per-device ro sysfs attr dm: optimize dm_mq_queue_rq to _not_ use kthread if using pure blk-mq dm: add full blk-mq support to request-based DM dm: impose configurable deadline for dm_request_fn's merge heuristic dm sysfs: introduce ability to add writable attributes dm: don't start current request if it would've merged with the previous ...
This commit is contained in:
commit
afad97eee4
@ -23,3 +23,25 @@ Description: Device-mapper device suspend state.
|
||||
Contains the value 1 while the device is suspended.
|
||||
Otherwise it contains 0. Read-only attribute.
|
||||
Users: util-linux, device-mapper udev rules
|
||||
|
||||
What: /sys/block/dm-<num>/dm/rq_based_seq_io_merge_deadline
|
||||
Date: March 2015
|
||||
KernelVersion: 4.1
|
||||
Contact: dm-devel@redhat.com
|
||||
Description: Allow control over how long a request that is a
|
||||
reasonable merge candidate can be queued on the request
|
||||
queue. The resolution of this deadline is in
|
||||
microseconds (ranging from 1 to 100000 usecs).
|
||||
Setting this attribute to 0 (the default) will disable
|
||||
request-based DM's merge heuristic and associated extra
|
||||
accounting. This attribute is not applicable to
|
||||
bio-based DM devices so it will only ever report 0 for
|
||||
them.
|
||||
|
||||
What: /sys/block/dm-<num>/dm/use_blk_mq
|
||||
Date: March 2015
|
||||
KernelVersion: 4.1
|
||||
Contact: dm-devel@redhat.com
|
||||
Description: Request-based Device-mapper blk-mq I/O path mode.
|
||||
Contains the value 1 if the device is using blk-mq.
|
||||
Otherwise it contains 0. Read-only attribute.
|
||||
|
@ -5,7 +5,7 @@ Device-Mapper's "crypt" target provides transparent encryption of block devices
|
||||
using the kernel crypto API.
|
||||
|
||||
For a more detailed description of supported parameters see:
|
||||
http://code.google.com/p/cryptsetup/wiki/DMCrypt
|
||||
https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt
|
||||
|
||||
Parameters: <cipher> <key> <iv_offset> <device path> \
|
||||
<offset> [<#opt_params> <opt_params>]
|
||||
@ -80,7 +80,7 @@ Example scripts
|
||||
===============
|
||||
LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
|
||||
encryption with dm-crypt using the 'cryptsetup' utility, see
|
||||
http://code.google.com/p/cryptsetup/
|
||||
https://gitlab.com/cryptsetup/cryptsetup
|
||||
|
||||
[[
|
||||
#!/bin/sh
|
||||
|
140
Documentation/device-mapper/log-writes.txt
Normal file
140
Documentation/device-mapper/log-writes.txt
Normal file
@ -0,0 +1,140 @@
|
||||
dm-log-writes
|
||||
=============
|
||||
|
||||
This target takes 2 devices, one to pass all IO to normally, and one to log all
|
||||
of the write operations to. This is intended for file system developers wishing
|
||||
to verify the integrity of metadata or data as the file system is written to.
|
||||
There is a log_write_entry written for every WRITE request and the target is
|
||||
able to take arbitrary data from userspace to insert into the log. The data
|
||||
that is in the WRITE requests is copied into the log to make the replay happen
|
||||
exactly as it happened originally.
|
||||
|
||||
Log Ordering
|
||||
============
|
||||
|
||||
We log things in order of completion once we are sure the write is no longer in
|
||||
cache. This means that normal WRITE requests are not actually logged until the
|
||||
next REQ_FLUSH request. This is to make it easier for userspace to replay the
|
||||
log in a way that correlates to what is on disk and not what is in cache, to
|
||||
make it easier to detect improper waiting/flushing.
|
||||
|
||||
This works by attaching all WRITE requests to a list once the write completes.
|
||||
Once we see a REQ_FLUSH request we splice this list onto the request and once
|
||||
the FLUSH request completes we log all of the WRITEs and then the FLUSH. Only
|
||||
completed WRITEs, at the time the REQ_FLUSH is issued, are added in order to
|
||||
simulate the worst case scenario with regard to power failures. Consider the
|
||||
following example (W means write, C means complete):
|
||||
|
||||
W1,W2,W3,C3,C2,Wflush,C1,Cflush
|
||||
|
||||
The log would show the following
|
||||
|
||||
W3,W2,flush,W1....
|
||||
|
||||
Again this is to simulate what is actually on disk, this allows us to detect
|
||||
cases where a power failure at a particular point in time would create an
|
||||
inconsistent file system.
|
||||
|
||||
Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
|
||||
they complete as those requests will obviously bypass the device cache.
|
||||
|
||||
Any REQ_DISCARD requests are treated like WRITE requests. Otherwise we would
|
||||
have all the DISCARD requests, and then the WRITE requests and then the FLUSH
|
||||
request. Consider the following example:
|
||||
|
||||
WRITE block 1, DISCARD block 1, FLUSH
|
||||
|
||||
If we logged DISCARD when it completed, the replay would look like this
|
||||
|
||||
DISCARD 1, WRITE 1, FLUSH
|
||||
|
||||
which isn't quite what happened and wouldn't be caught during the log replay.
|
||||
|
||||
Target interface
|
||||
================
|
||||
|
||||
i) Constructor
|
||||
|
||||
log-writes <dev_path> <log_dev_path>
|
||||
|
||||
dev_path : Device that all of the IO will go to normally.
|
||||
log_dev_path : Device where the log entries are written to.
|
||||
|
||||
ii) Status
|
||||
|
||||
<#logged entries> <highest allocated sector>
|
||||
|
||||
#logged entries : Number of logged entries
|
||||
highest allocated sector : Highest allocated sector
|
||||
|
||||
iii) Messages
|
||||
|
||||
mark <description>
|
||||
|
||||
You can use a dmsetup message to set an arbitrary mark in a log.
|
||||
For example say you want to fsck a file system after every
|
||||
write, but first you need to replay up to the mkfs to make sure
|
||||
we're fsck'ing something reasonable, you would do something like
|
||||
this:
|
||||
|
||||
mkfs.btrfs -f /dev/mapper/log
|
||||
dmsetup message log 0 mark mkfs
|
||||
<run test>
|
||||
|
||||
This would allow you to replay the log up to the mkfs mark and
|
||||
then replay from that point on doing the fsck check in the
|
||||
interval that you want.
|
||||
|
||||
Every log has a mark at the end labeled "dm-log-writes-end".
|
||||
|
||||
Userspace component
|
||||
===================
|
||||
|
||||
There is a userspace tool that will replay the log for you in various ways.
|
||||
It can be found here: https://github.com/josefbacik/log-writes
|
||||
|
||||
Example usage
|
||||
=============
|
||||
|
||||
Say you want to test fsync on your file system. You would do something like
|
||||
this:
|
||||
|
||||
TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
|
||||
dmsetup create log --table "$TABLE"
|
||||
mkfs.btrfs -f /dev/mapper/log
|
||||
dmsetup message log 0 mark mkfs
|
||||
|
||||
mount /dev/mapper/log /mnt/btrfs-test
|
||||
<some test that does fsync at the end>
|
||||
dmsetup message log 0 mark fsync
|
||||
md5sum /mnt/btrfs-test/foo
|
||||
umount /mnt/btrfs-test
|
||||
|
||||
dmsetup remove log
|
||||
replay-log --log /dev/sdc --replay /dev/sdb --end-mark fsync
|
||||
mount /dev/sdb /mnt/btrfs-test
|
||||
md5sum /mnt/btrfs-test/foo
|
||||
<verify md5sum's are correct>
|
||||
|
||||
Another option is to do a complicated file system operation and verify the file
|
||||
system is consistent during the entire operation. You could do this with:
|
||||
|
||||
TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
|
||||
dmsetup create log --table "$TABLE"
|
||||
mkfs.btrfs -f /dev/mapper/log
|
||||
dmsetup message log 0 mark mkfs
|
||||
|
||||
mount /dev/mapper/log /mnt/btrfs-test
|
||||
<fsstress to dirty the fs>
|
||||
btrfs filesystem balance /mnt/btrfs-test
|
||||
umount /mnt/btrfs-test
|
||||
dmsetup remove log
|
||||
|
||||
replay-log --log /dev/sdc --replay /dev/sdb --end-mark mkfs
|
||||
btrfsck /dev/sdb
|
||||
replay-log --log /dev/sdc --replay /dev/sdb --start-mark mkfs \
|
||||
--fsck "btrfsck /dev/sdb" --check fua
|
||||
|
||||
And that will replay the log until it sees a FUA request, run the fsck command
|
||||
and if the fsck passes it will replay to the next FUA, until it is completed or
|
||||
the fsck command exists abnormally.
|
@ -47,8 +47,8 @@ consume far too much memory.
|
||||
Using this device-mapper switch target we can now build a two-layer
|
||||
device hierarchy:
|
||||
|
||||
Upper Tier – Determine which array member the I/O should be sent to.
|
||||
Lower Tier – Load balance amongst paths to a particular member.
|
||||
Upper Tier - Determine which array member the I/O should be sent to.
|
||||
Lower Tier - Load balance amongst paths to a particular member.
|
||||
|
||||
The lower tier consists of a single dm multipath device for each member.
|
||||
Each of these multipath devices contains the set of paths directly to
|
||||
|
@ -380,9 +380,6 @@ then you'll have no access to blocks mapped beyond the end. If you
|
||||
load a target that is bigger than before, then extra blocks will be
|
||||
provisioned as and when needed.
|
||||
|
||||
If you wish to reduce the size of your thin device and potentially
|
||||
regain some space then send the 'trim' message to the pool.
|
||||
|
||||
ii) Status
|
||||
|
||||
<nr mapped sectors> <highest mapped sector>
|
||||
|
@ -11,6 +11,7 @@ Construction Parameters
|
||||
<data_block_size> <hash_block_size>
|
||||
<num_data_blocks> <hash_start_block>
|
||||
<algorithm> <digest> <salt>
|
||||
[<#opt_params> <opt_params>]
|
||||
|
||||
<version>
|
||||
This is the type of the on-disk hash format.
|
||||
@ -62,6 +63,22 @@ Construction Parameters
|
||||
<salt>
|
||||
The hexadecimal encoding of the salt value.
|
||||
|
||||
<#opt_params>
|
||||
Number of optional parameters. If there are no optional parameters,
|
||||
the optional paramaters section can be skipped or #opt_params can be zero.
|
||||
Otherwise #opt_params is the number of following arguments.
|
||||
|
||||
Example of optional parameters section:
|
||||
1 ignore_corruption
|
||||
|
||||
ignore_corruption
|
||||
Log corrupted blocks, but allow read operations to proceed normally.
|
||||
|
||||
restart_on_corruption
|
||||
Restart the system when a corrupted block is discovered. This option is
|
||||
not compatible with ignore_corruption and requires user space support to
|
||||
avoid restart loops.
|
||||
|
||||
Theory of operation
|
||||
===================
|
||||
|
||||
@ -125,7 +142,7 @@ block boundary) are the hash blocks which are stored a depth at a time
|
||||
|
||||
The full specification of kernel parameters and on-disk metadata format
|
||||
is available at the cryptsetup project's wiki page
|
||||
http://code.google.com/p/cryptsetup/wiki/DMVerity
|
||||
https://gitlab.com/cryptsetup/cryptsetup/wikis/DMVerity
|
||||
|
||||
Status
|
||||
======
|
||||
@ -142,7 +159,7 @@ Set up a device:
|
||||
|
||||
A command line tool veritysetup is available to compute or verify
|
||||
the hash tree or activate the kernel device. This is available from
|
||||
the cryptsetup upstream repository http://code.google.com/p/cryptsetup/
|
||||
the cryptsetup upstream repository https://gitlab.com/cryptsetup/cryptsetup/
|
||||
(as a libcryptsetup extension).
|
||||
|
||||
Create hash on the device:
|
||||
|
@ -196,6 +196,17 @@ config BLK_DEV_DM
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config DM_MQ_DEFAULT
|
||||
bool "request-based DM: use blk-mq I/O path by default"
|
||||
depends on BLK_DEV_DM
|
||||
---help---
|
||||
This option enables the blk-mq based I/O path for request-based
|
||||
DM devices by default. With the option the dm_mod.use_blk_mq
|
||||
module/boot option defaults to Y, without it to N, but it can
|
||||
still be overriden either way.
|
||||
|
||||
If unsure say N.
|
||||
|
||||
config DM_DEBUG
|
||||
bool "Device mapper debugging support"
|
||||
depends on BLK_DEV_DM
|
||||
@ -432,4 +443,20 @@ config DM_SWITCH
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config DM_LOG_WRITES
|
||||
tristate "Log writes target support"
|
||||
depends on BLK_DEV_DM
|
||||
---help---
|
||||
This device-mapper target takes two devices, one device to use
|
||||
normally, one to log all write operations done to the first device.
|
||||
This is for use by file system developers wishing to verify that
|
||||
their fs is writing a consitent file system at all times by allowing
|
||||
them to replay the log in a variety of ways and to check the
|
||||
contents.
|
||||
|
||||
To compile this code as a module, choose M here: the module will
|
||||
be called dm-log-writes.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
endif # MD
|
||||
|
@ -55,6 +55,7 @@ obj-$(CONFIG_DM_CACHE) += dm-cache.o
|
||||
obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o
|
||||
obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o
|
||||
obj-$(CONFIG_DM_ERA) += dm-era.o
|
||||
obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
|
||||
|
||||
ifeq ($(CONFIG_DM_UEVENT),y)
|
||||
dm-mod-objs += dm-uevent.o
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "dm.h"
|
||||
|
||||
#include <linux/hash.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/slab.h>
|
||||
@ -124,32 +125,41 @@ static void iot_examine_bio(struct io_tracker *t, struct bio *bio)
|
||||
* sorted queue.
|
||||
*/
|
||||
#define NR_QUEUE_LEVELS 16u
|
||||
#define NR_SENTINELS NR_QUEUE_LEVELS * 3
|
||||
|
||||
#define WRITEBACK_PERIOD HZ
|
||||
|
||||
struct queue {
|
||||
unsigned nr_elts;
|
||||
bool current_writeback_sentinels;
|
||||
unsigned long next_writeback;
|
||||
struct list_head qs[NR_QUEUE_LEVELS];
|
||||
struct list_head sentinels[NR_SENTINELS];
|
||||
};
|
||||
|
||||
static void queue_init(struct queue *q)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < NR_QUEUE_LEVELS; i++)
|
||||
q->nr_elts = 0;
|
||||
q->current_writeback_sentinels = false;
|
||||
q->next_writeback = 0;
|
||||
for (i = 0; i < NR_QUEUE_LEVELS; i++) {
|
||||
INIT_LIST_HEAD(q->qs + i);
|
||||
INIT_LIST_HEAD(q->sentinels + i);
|
||||
INIT_LIST_HEAD(q->sentinels + NR_QUEUE_LEVELS + i);
|
||||
INIT_LIST_HEAD(q->sentinels + (2 * NR_QUEUE_LEVELS) + i);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned queue_size(struct queue *q)
|
||||
{
|
||||
return q->nr_elts;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks to see if the queue is empty.
|
||||
* FIXME: reduce cpu usage.
|
||||
*/
|
||||
static bool queue_empty(struct queue *q)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < NR_QUEUE_LEVELS; i++)
|
||||
if (!list_empty(q->qs + i))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return q->nr_elts == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -157,24 +167,19 @@ static bool queue_empty(struct queue *q)
|
||||
*/
|
||||
static void queue_push(struct queue *q, unsigned level, struct list_head *elt)
|
||||
{
|
||||
q->nr_elts++;
|
||||
list_add_tail(elt, q->qs + level);
|
||||
}
|
||||
|
||||
static void queue_remove(struct list_head *elt)
|
||||
static void queue_remove(struct queue *q, struct list_head *elt)
|
||||
{
|
||||
q->nr_elts--;
|
||||
list_del(elt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Shifts all regions down one level. This has no effect on the order of
|
||||
* the queue.
|
||||
*/
|
||||
static void queue_shift_down(struct queue *q)
|
||||
static bool is_sentinel(struct queue *q, struct list_head *h)
|
||||
{
|
||||
unsigned level;
|
||||
|
||||
for (level = 1; level < NR_QUEUE_LEVELS; level++)
|
||||
list_splice_init(q->qs + level, q->qs + level - 1);
|
||||
return (h >= q->sentinels) && (h < (q->sentinels + NR_SENTINELS));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -184,10 +189,12 @@ static void queue_shift_down(struct queue *q)
|
||||
static struct list_head *queue_peek(struct queue *q)
|
||||
{
|
||||
unsigned level;
|
||||
struct list_head *h;
|
||||
|
||||
for (level = 0; level < NR_QUEUE_LEVELS; level++)
|
||||
if (!list_empty(q->qs + level))
|
||||
return q->qs[level].next;
|
||||
list_for_each(h, q->qs + level)
|
||||
if (!is_sentinel(q, h))
|
||||
return h;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@ -197,16 +204,34 @@ static struct list_head *queue_pop(struct queue *q)
|
||||
struct list_head *r = queue_peek(q);
|
||||
|
||||
if (r) {
|
||||
q->nr_elts--;
|
||||
list_del(r);
|
||||
|
||||
/* have we just emptied the bottom level? */
|
||||
if (list_empty(q->qs))
|
||||
queue_shift_down(q);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pops an entry from a level that is not past a sentinel.
|
||||
*/
|
||||
static struct list_head *queue_pop_old(struct queue *q)
|
||||
{
|
||||
unsigned level;
|
||||
struct list_head *h;
|
||||
|
||||
for (level = 0; level < NR_QUEUE_LEVELS; level++)
|
||||
list_for_each(h, q->qs + level) {
|
||||
if (is_sentinel(q, h))
|
||||
break;
|
||||
|
||||
q->nr_elts--;
|
||||
list_del(h);
|
||||
return h;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct list_head *list_pop(struct list_head *lh)
|
||||
{
|
||||
struct list_head *r = lh->next;
|
||||
@ -217,6 +242,62 @@ static struct list_head *list_pop(struct list_head *lh)
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct list_head *writeback_sentinel(struct queue *q, unsigned level)
|
||||
{
|
||||
if (q->current_writeback_sentinels)
|
||||
return q->sentinels + NR_QUEUE_LEVELS + level;
|
||||
else
|
||||
return q->sentinels + 2 * NR_QUEUE_LEVELS + level;
|
||||
}
|
||||
|
||||
static void queue_update_writeback_sentinels(struct queue *q)
|
||||
{
|
||||
unsigned i;
|
||||
struct list_head *h;
|
||||
|
||||
if (time_after(jiffies, q->next_writeback)) {
|
||||
for (i = 0; i < NR_QUEUE_LEVELS; i++) {
|
||||
h = writeback_sentinel(q, i);
|
||||
list_del(h);
|
||||
list_add_tail(h, q->qs + i);
|
||||
}
|
||||
|
||||
q->next_writeback = jiffies + WRITEBACK_PERIOD;
|
||||
q->current_writeback_sentinels = !q->current_writeback_sentinels;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Sometimes we want to iterate through entries that have been pushed since
|
||||
* a certain event. We use sentinel entries on the queues to delimit these
|
||||
* 'tick' events.
|
||||
*/
|
||||
static void queue_tick(struct queue *q)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < NR_QUEUE_LEVELS; i++) {
|
||||
list_del(q->sentinels + i);
|
||||
list_add_tail(q->sentinels + i, q->qs + i);
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*iter_fn)(struct list_head *, void *);
|
||||
static void queue_iterate_tick(struct queue *q, iter_fn fn, void *context)
|
||||
{
|
||||
unsigned i;
|
||||
struct list_head *h;
|
||||
|
||||
for (i = 0; i < NR_QUEUE_LEVELS; i++) {
|
||||
list_for_each_prev(h, q->qs + i) {
|
||||
if (is_sentinel(q, h))
|
||||
break;
|
||||
|
||||
fn(h, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
@ -232,8 +313,6 @@ struct entry {
|
||||
*/
|
||||
bool dirty:1;
|
||||
unsigned hit_count;
|
||||
unsigned generation;
|
||||
unsigned tick;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -481,7 +560,6 @@ static bool in_cache(struct mq_policy *mq, struct entry *e)
|
||||
*/
|
||||
static void push(struct mq_policy *mq, struct entry *e)
|
||||
{
|
||||
e->tick = mq->tick;
|
||||
hash_insert(mq, e);
|
||||
|
||||
if (in_cache(mq, e))
|
||||
@ -496,7 +574,11 @@ static void push(struct mq_policy *mq, struct entry *e)
|
||||
*/
|
||||
static void del(struct mq_policy *mq, struct entry *e)
|
||||
{
|
||||
queue_remove(&e->list);
|
||||
if (in_cache(mq, e))
|
||||
queue_remove(e->dirty ? &mq->cache_dirty : &mq->cache_clean, &e->list);
|
||||
else
|
||||
queue_remove(&mq->pre_cache, &e->list);
|
||||
|
||||
hash_remove(e);
|
||||
}
|
||||
|
||||
@ -518,20 +600,26 @@ static struct entry *pop(struct mq_policy *mq, struct queue *q)
|
||||
return e;
|
||||
}
|
||||
|
||||
static struct entry *pop_old(struct mq_policy *mq, struct queue *q)
|
||||
{
|
||||
struct entry *e;
|
||||
struct list_head *h = queue_pop_old(q);
|
||||
|
||||
if (!h)
|
||||
return NULL;
|
||||
|
||||
e = container_of(h, struct entry, list);
|
||||
hash_remove(e);
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
static struct entry *peek(struct queue *q)
|
||||
{
|
||||
struct list_head *h = queue_peek(q);
|
||||
return h ? container_of(h, struct entry, list) : NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Has this entry already been updated?
|
||||
*/
|
||||
static bool updated_this_tick(struct mq_policy *mq, struct entry *e)
|
||||
{
|
||||
return mq->tick == e->tick;
|
||||
}
|
||||
|
||||
/*
|
||||
* The promotion threshold is adjusted every generation. As are the counts
|
||||
* of the entries.
|
||||
@ -583,20 +671,9 @@ static void check_generation(struct mq_policy *mq)
|
||||
* Whenever we use an entry we bump up it's hit counter, and push it to the
|
||||
* back to it's current level.
|
||||
*/
|
||||
static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e)
|
||||
static void requeue(struct mq_policy *mq, struct entry *e)
|
||||
{
|
||||
if (updated_this_tick(mq, e))
|
||||
return;
|
||||
|
||||
e->hit_count++;
|
||||
mq->hit_count++;
|
||||
check_generation(mq);
|
||||
|
||||
/* generation adjustment, to stop the counts increasing forever. */
|
||||
/* FIXME: divide? */
|
||||
/* e->hit_count -= min(e->hit_count - 1, mq->generation - e->generation); */
|
||||
e->generation = mq->generation;
|
||||
|
||||
del(mq, e);
|
||||
push(mq, e);
|
||||
}
|
||||
@ -703,7 +780,7 @@ static int cache_entry_found(struct mq_policy *mq,
|
||||
struct entry *e,
|
||||
struct policy_result *result)
|
||||
{
|
||||
requeue_and_update_tick(mq, e);
|
||||
requeue(mq, e);
|
||||
|
||||
if (in_cache(mq, e)) {
|
||||
result->op = POLICY_HIT;
|
||||
@ -740,8 +817,6 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
|
||||
new_e->oblock = e->oblock;
|
||||
new_e->dirty = false;
|
||||
new_e->hit_count = e->hit_count;
|
||||
new_e->generation = e->generation;
|
||||
new_e->tick = e->tick;
|
||||
|
||||
del(mq, e);
|
||||
free_entry(&mq->pre_cache_pool, e);
|
||||
@ -757,18 +832,16 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
|
||||
int data_dir, struct policy_result *result)
|
||||
{
|
||||
int r = 0;
|
||||
bool updated = updated_this_tick(mq, e);
|
||||
|
||||
if ((!discarded_oblock && updated) ||
|
||||
!should_promote(mq, e, discarded_oblock, data_dir)) {
|
||||
requeue_and_update_tick(mq, e);
|
||||
if (!should_promote(mq, e, discarded_oblock, data_dir)) {
|
||||
requeue(mq, e);
|
||||
result->op = POLICY_MISS;
|
||||
|
||||
} else if (!can_migrate)
|
||||
r = -EWOULDBLOCK;
|
||||
|
||||
else {
|
||||
requeue_and_update_tick(mq, e);
|
||||
requeue(mq, e);
|
||||
r = pre_cache_to_cache(mq, e, result);
|
||||
}
|
||||
|
||||
@ -795,7 +868,6 @@ static void insert_in_pre_cache(struct mq_policy *mq,
|
||||
e->dirty = false;
|
||||
e->oblock = oblock;
|
||||
e->hit_count = 1;
|
||||
e->generation = mq->generation;
|
||||
push(mq, e);
|
||||
}
|
||||
|
||||
@ -828,7 +900,6 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
|
||||
e->oblock = oblock;
|
||||
e->dirty = false;
|
||||
e->hit_count = 1;
|
||||
e->generation = mq->generation;
|
||||
push(mq, e);
|
||||
|
||||
result->cblock = infer_cblock(&mq->cache_pool, e);
|
||||
@ -905,12 +976,37 @@ static void mq_destroy(struct dm_cache_policy *p)
|
||||
kfree(mq);
|
||||
}
|
||||
|
||||
static void update_pre_cache_hits(struct list_head *h, void *context)
|
||||
{
|
||||
struct entry *e = container_of(h, struct entry, list);
|
||||
e->hit_count++;
|
||||
}
|
||||
|
||||
static void update_cache_hits(struct list_head *h, void *context)
|
||||
{
|
||||
struct mq_policy *mq = context;
|
||||
struct entry *e = container_of(h, struct entry, list);
|
||||
e->hit_count++;
|
||||
mq->hit_count++;
|
||||
}
|
||||
|
||||
static void copy_tick(struct mq_policy *mq)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long flags, tick;
|
||||
|
||||
spin_lock_irqsave(&mq->tick_lock, flags);
|
||||
mq->tick = mq->tick_protected;
|
||||
tick = mq->tick_protected;
|
||||
if (tick != mq->tick) {
|
||||
queue_iterate_tick(&mq->pre_cache, update_pre_cache_hits, mq);
|
||||
queue_iterate_tick(&mq->cache_dirty, update_cache_hits, mq);
|
||||
queue_iterate_tick(&mq->cache_clean, update_cache_hits, mq);
|
||||
mq->tick = tick;
|
||||
}
|
||||
|
||||
queue_tick(&mq->pre_cache);
|
||||
queue_tick(&mq->cache_dirty);
|
||||
queue_tick(&mq->cache_clean);
|
||||
queue_update_writeback_sentinels(&mq->cache_dirty);
|
||||
spin_unlock_irqrestore(&mq->tick_lock, flags);
|
||||
}
|
||||
|
||||
@ -1001,7 +1097,6 @@ static int mq_load_mapping(struct dm_cache_policy *p,
|
||||
e->oblock = oblock;
|
||||
e->dirty = false; /* this gets corrected in a minute */
|
||||
e->hit_count = hint_valid ? hint : 1;
|
||||
e->generation = mq->generation;
|
||||
push(mq, e);
|
||||
|
||||
return 0;
|
||||
@ -1012,10 +1107,15 @@ static int mq_save_hints(struct mq_policy *mq, struct queue *q,
|
||||
{
|
||||
int r;
|
||||
unsigned level;
|
||||
struct list_head *h;
|
||||
struct entry *e;
|
||||
|
||||
for (level = 0; level < NR_QUEUE_LEVELS; level++)
|
||||
list_for_each_entry(e, q->qs + level, list) {
|
||||
list_for_each(h, q->qs + level) {
|
||||
if (is_sentinel(q, h))
|
||||
continue;
|
||||
|
||||
e = container_of(h, struct entry, list);
|
||||
r = fn(context, infer_cblock(&mq->cache_pool, e),
|
||||
e->oblock, e->hit_count);
|
||||
if (r)
|
||||
@ -1087,10 +1187,27 @@ static int mq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
|
||||
return r;
|
||||
}
|
||||
|
||||
#define CLEAN_TARGET_PERCENTAGE 25
|
||||
|
||||
static bool clean_target_met(struct mq_policy *mq)
|
||||
{
|
||||
/*
|
||||
* Cache entries may not be populated. So we're cannot rely on the
|
||||
* size of the clean queue.
|
||||
*/
|
||||
unsigned nr_clean = from_cblock(mq->cache_size) - queue_size(&mq->cache_dirty);
|
||||
unsigned target = from_cblock(mq->cache_size) * CLEAN_TARGET_PERCENTAGE / 100;
|
||||
|
||||
return nr_clean >= target;
|
||||
}
|
||||
|
||||
static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock,
|
||||
dm_cblock_t *cblock)
|
||||
{
|
||||
struct entry *e = pop(mq, &mq->cache_dirty);
|
||||
struct entry *e = pop_old(mq, &mq->cache_dirty);
|
||||
|
||||
if (!e && !clean_target_met(mq))
|
||||
e = pop(mq, &mq->cache_dirty);
|
||||
|
||||
if (!e)
|
||||
return -ENODATA;
|
||||
|
@ -228,7 +228,7 @@ static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc)
|
||||
*
|
||||
* tcw: Compatible implementation of the block chaining mode used
|
||||
* by the TrueCrypt device encryption system (prior to version 4.1).
|
||||
* For more info see: http://www.truecrypt.org
|
||||
* For more info see: https://gitlab.com/cryptsetup/cryptsetup/wikis/TrueCryptOnDiskFormat
|
||||
* It operates on full 512 byte sectors and uses CBC
|
||||
* with an IV derived from initial key and the sector number.
|
||||
* In addition, whitening value is applied on every sector, whitening
|
||||
@ -925,11 +925,10 @@ static int crypt_convert(struct crypt_config *cc,
|
||||
|
||||
switch (r) {
|
||||
/* async */
|
||||
case -EINPROGRESS:
|
||||
case -EBUSY:
|
||||
wait_for_completion(&ctx->restart);
|
||||
reinit_completion(&ctx->restart);
|
||||
/* fall through*/
|
||||
case -EINPROGRESS:
|
||||
ctx->req = NULL;
|
||||
ctx->cc_sector++;
|
||||
continue;
|
||||
@ -1124,15 +1123,15 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
|
||||
static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
|
||||
{
|
||||
struct crypt_config *cc = io->cc;
|
||||
struct bio *base_bio = io->base_bio;
|
||||
struct bio *clone;
|
||||
|
||||
/*
|
||||
* The block layer might modify the bvec array, so always
|
||||
* copy the required bvecs because we need the original
|
||||
* one in order to decrypt the whole bio data *afterwards*.
|
||||
* We need the original biovec array in order to decrypt
|
||||
* the whole bio data *afterwards* -- thanks to immutable
|
||||
* biovecs we don't need to worry about the block layer
|
||||
* modifying the biovec array; so leverage bio_clone_fast().
|
||||
*/
|
||||
clone = bio_clone_bioset(base_bio, gfp, cc->bs);
|
||||
clone = bio_clone_fast(io->base_bio, gfp, cc->bs);
|
||||
if (!clone)
|
||||
return 1;
|
||||
|
||||
@ -1346,10 +1345,8 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
|
||||
struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx);
|
||||
struct crypt_config *cc = io->cc;
|
||||
|
||||
if (error == -EINPROGRESS) {
|
||||
complete(&ctx->restart);
|
||||
if (error == -EINPROGRESS)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post)
|
||||
error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq);
|
||||
@ -1360,12 +1357,15 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
|
||||
crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
|
||||
|
||||
if (!atomic_dec_and_test(&ctx->cc_pending))
|
||||
return;
|
||||
goto done;
|
||||
|
||||
if (bio_data_dir(io->base_bio) == READ)
|
||||
kcryptd_crypt_read_done(io);
|
||||
else
|
||||
kcryptd_crypt_write_io_submit(io, 1);
|
||||
done:
|
||||
if (!completion_done(&ctx->restart))
|
||||
complete(&ctx->restart);
|
||||
}
|
||||
|
||||
static void kcryptd_crypt(struct work_struct *work)
|
||||
@ -1816,6 +1816,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||
if (ret)
|
||||
goto bad;
|
||||
|
||||
ret = -EINVAL;
|
||||
while (opt_params--) {
|
||||
opt_string = dm_shift_arg(&as);
|
||||
if (!opt_string) {
|
||||
|
@ -236,7 +236,7 @@ static int delay_bio(struct delay_c *dc, int delay, struct bio *bio)
|
||||
delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
|
||||
|
||||
delayed->context = dc;
|
||||
delayed->expires = expires = jiffies + (delay * HZ / 1000);
|
||||
delayed->expires = expires = jiffies + msecs_to_jiffies(delay);
|
||||
|
||||
mutex_lock(&delayed_bios_lock);
|
||||
|
||||
|
@ -17,7 +17,9 @@
|
||||
|
||||
#define DM_LOG_USERSPACE_VSN "1.3.0"
|
||||
|
||||
struct flush_entry {
|
||||
#define FLUSH_ENTRY_POOL_SIZE 16
|
||||
|
||||
struct dm_dirty_log_flush_entry {
|
||||
int type;
|
||||
region_t region;
|
||||
struct list_head list;
|
||||
@ -34,22 +36,14 @@ struct flush_entry {
|
||||
struct log_c {
|
||||
struct dm_target *ti;
|
||||
struct dm_dev *log_dev;
|
||||
uint32_t region_size;
|
||||
region_t region_count;
|
||||
uint64_t luid;
|
||||
char uuid[DM_UUID_LEN];
|
||||
|
||||
char *usr_argv_str;
|
||||
uint32_t usr_argc;
|
||||
|
||||
/*
|
||||
* in_sync_hint gets set when doing is_remote_recovering. It
|
||||
* represents the first region that needs recovery. IOW, the
|
||||
* first zero bit of sync_bits. This can be useful for to limit
|
||||
* traffic for calls like is_remote_recovering and get_resync_work,
|
||||
* but be take care in its use for anything else.
|
||||
*/
|
||||
uint64_t in_sync_hint;
|
||||
uint32_t region_size;
|
||||
region_t region_count;
|
||||
uint64_t luid;
|
||||
char uuid[DM_UUID_LEN];
|
||||
|
||||
/*
|
||||
* Mark and clear requests are held until a flush is issued
|
||||
@ -61,6 +55,15 @@ struct log_c {
|
||||
struct list_head mark_list;
|
||||
struct list_head clear_list;
|
||||
|
||||
/*
|
||||
* in_sync_hint gets set when doing is_remote_recovering. It
|
||||
* represents the first region that needs recovery. IOW, the
|
||||
* first zero bit of sync_bits. This can be useful for to limit
|
||||
* traffic for calls like is_remote_recovering and get_resync_work,
|
||||
* but be take care in its use for anything else.
|
||||
*/
|
||||
uint64_t in_sync_hint;
|
||||
|
||||
/*
|
||||
* Workqueue for flush of clear region requests.
|
||||
*/
|
||||
@ -72,19 +75,11 @@ struct log_c {
|
||||
* Combine userspace flush and mark requests for efficiency.
|
||||
*/
|
||||
uint32_t integrated_flush;
|
||||
|
||||
mempool_t *flush_entry_pool;
|
||||
};
|
||||
|
||||
static mempool_t *flush_entry_pool;
|
||||
|
||||
static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
|
||||
{
|
||||
return kmalloc(sizeof(struct flush_entry), gfp_mask);
|
||||
}
|
||||
|
||||
static void flush_entry_free(void *element, void *pool_data)
|
||||
{
|
||||
kfree(element);
|
||||
}
|
||||
static struct kmem_cache *_flush_entry_cache;
|
||||
|
||||
static int userspace_do_request(struct log_c *lc, const char *uuid,
|
||||
int request_type, char *data, size_t data_size,
|
||||
@ -254,6 +249,14 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
|
||||
goto out;
|
||||
}
|
||||
|
||||
lc->flush_entry_pool = mempool_create_slab_pool(FLUSH_ENTRY_POOL_SIZE,
|
||||
_flush_entry_cache);
|
||||
if (!lc->flush_entry_pool) {
|
||||
DMERR("Failed to create flush_entry_pool");
|
||||
r = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send table string and get back any opened device.
|
||||
*/
|
||||
@ -310,6 +313,8 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
|
||||
out:
|
||||
kfree(devices_rdata);
|
||||
if (r) {
|
||||
if (lc->flush_entry_pool)
|
||||
mempool_destroy(lc->flush_entry_pool);
|
||||
kfree(lc);
|
||||
kfree(ctr_str);
|
||||
} else {
|
||||
@ -338,6 +343,8 @@ static void userspace_dtr(struct dm_dirty_log *log)
|
||||
if (lc->log_dev)
|
||||
dm_put_device(lc->ti, lc->log_dev);
|
||||
|
||||
mempool_destroy(lc->flush_entry_pool);
|
||||
|
||||
kfree(lc->usr_argv_str);
|
||||
kfree(lc);
|
||||
|
||||
@ -461,7 +468,7 @@ static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
|
||||
static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list)
|
||||
{
|
||||
int r = 0;
|
||||
struct flush_entry *fe;
|
||||
struct dm_dirty_log_flush_entry *fe;
|
||||
|
||||
list_for_each_entry(fe, flush_list, list) {
|
||||
r = userspace_do_request(lc, lc->uuid, fe->type,
|
||||
@ -481,7 +488,7 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list,
|
||||
int r = 0;
|
||||
int count;
|
||||
uint32_t type = 0;
|
||||
struct flush_entry *fe, *tmp_fe;
|
||||
struct dm_dirty_log_flush_entry *fe, *tmp_fe;
|
||||
LIST_HEAD(tmp_list);
|
||||
uint64_t group[MAX_FLUSH_GROUP_COUNT];
|
||||
|
||||
@ -563,7 +570,8 @@ static int userspace_flush(struct dm_dirty_log *log)
|
||||
LIST_HEAD(clear_list);
|
||||
int mark_list_is_empty;
|
||||
int clear_list_is_empty;
|
||||
struct flush_entry *fe, *tmp_fe;
|
||||
struct dm_dirty_log_flush_entry *fe, *tmp_fe;
|
||||
mempool_t *flush_entry_pool = lc->flush_entry_pool;
|
||||
|
||||
spin_lock_irqsave(&lc->flush_lock, flags);
|
||||
list_splice_init(&lc->mark_list, &mark_list);
|
||||
@ -643,10 +651,10 @@ static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct log_c *lc = log->context;
|
||||
struct flush_entry *fe;
|
||||
struct dm_dirty_log_flush_entry *fe;
|
||||
|
||||
/* Wait for an allocation, but _never_ fail */
|
||||
fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
|
||||
fe = mempool_alloc(lc->flush_entry_pool, GFP_NOIO);
|
||||
BUG_ON(!fe);
|
||||
|
||||
spin_lock_irqsave(&lc->flush_lock, flags);
|
||||
@ -672,7 +680,7 @@ static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct log_c *lc = log->context;
|
||||
struct flush_entry *fe;
|
||||
struct dm_dirty_log_flush_entry *fe;
|
||||
|
||||
/*
|
||||
* If we fail to allocate, we skip the clearing of
|
||||
@ -680,7 +688,7 @@ static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
|
||||
* to cause the region to be resync'ed when the
|
||||
* device is activated next time.
|
||||
*/
|
||||
fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
|
||||
fe = mempool_alloc(lc->flush_entry_pool, GFP_ATOMIC);
|
||||
if (!fe) {
|
||||
DMERR("Failed to allocate memory to clear region.");
|
||||
return;
|
||||
@ -733,7 +741,6 @@ static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
|
||||
static void userspace_set_region_sync(struct dm_dirty_log *log,
|
||||
region_t region, int in_sync)
|
||||
{
|
||||
int r;
|
||||
struct log_c *lc = log->context;
|
||||
struct {
|
||||
region_t r;
|
||||
@ -743,12 +750,12 @@ static void userspace_set_region_sync(struct dm_dirty_log *log,
|
||||
pkg.r = region;
|
||||
pkg.i = (int64_t)in_sync;
|
||||
|
||||
r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
|
||||
(char *)&pkg, sizeof(pkg), NULL, NULL);
|
||||
(void) userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
|
||||
(char *)&pkg, sizeof(pkg), NULL, NULL);
|
||||
|
||||
/*
|
||||
* It would be nice to be able to report failures.
|
||||
* However, it is easy emough to detect and resolve.
|
||||
* However, it is easy enough to detect and resolve.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
@ -886,18 +893,16 @@ static int __init userspace_dirty_log_init(void)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
flush_entry_pool = mempool_create(100, flush_entry_alloc,
|
||||
flush_entry_free, NULL);
|
||||
|
||||
if (!flush_entry_pool) {
|
||||
DMWARN("Unable to create flush_entry_pool: No memory.");
|
||||
_flush_entry_cache = KMEM_CACHE(dm_dirty_log_flush_entry, 0);
|
||||
if (!_flush_entry_cache) {
|
||||
DMWARN("Unable to create flush_entry_cache: No memory.");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
r = dm_ulog_tfr_init();
|
||||
if (r) {
|
||||
DMWARN("Unable to initialize userspace log communications");
|
||||
mempool_destroy(flush_entry_pool);
|
||||
kmem_cache_destroy(_flush_entry_cache);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -905,7 +910,7 @@ static int __init userspace_dirty_log_init(void)
|
||||
if (r) {
|
||||
DMWARN("Couldn't register userspace dirty log type");
|
||||
dm_ulog_tfr_exit();
|
||||
mempool_destroy(flush_entry_pool);
|
||||
kmem_cache_destroy(_flush_entry_cache);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -917,7 +922,7 @@ static void __exit userspace_dirty_log_exit(void)
|
||||
{
|
||||
dm_dirty_log_type_unregister(&_userspace_type);
|
||||
dm_ulog_tfr_exit();
|
||||
mempool_destroy(flush_entry_pool);
|
||||
kmem_cache_destroy(_flush_entry_cache);
|
||||
|
||||
DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded");
|
||||
return;
|
||||
|
@ -172,6 +172,7 @@ int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
|
||||
char *rdata, size_t *rdata_size)
|
||||
{
|
||||
int r = 0;
|
||||
unsigned long tmo;
|
||||
size_t dummy = 0;
|
||||
int overhead_size = sizeof(struct dm_ulog_request) + sizeof(struct cn_msg);
|
||||
struct dm_ulog_request *tfr = prealloced_ulog_tfr;
|
||||
@ -236,11 +237,11 @@ resend:
|
||||
goto out;
|
||||
}
|
||||
|
||||
r = wait_for_completion_timeout(&(pkg.complete), DM_ULOG_RETRY_TIMEOUT);
|
||||
tmo = wait_for_completion_timeout(&(pkg.complete), DM_ULOG_RETRY_TIMEOUT);
|
||||
spin_lock(&receiving_list_lock);
|
||||
list_del_init(&(pkg.list));
|
||||
spin_unlock(&receiving_list_lock);
|
||||
if (!r) {
|
||||
if (!tmo) {
|
||||
DMWARN("[%s] Request timed out: [%u/%u] - retrying",
|
||||
(strlen(uuid) > 8) ?
|
||||
(uuid + (strlen(uuid) - 8)) : (uuid),
|
||||
|
825
drivers/md/dm-log-writes.c
Normal file
825
drivers/md/dm-log-writes.c
Normal file
@ -0,0 +1,825 @@
|
||||
/*
|
||||
* Copyright (C) 2014 Facebook. All rights reserved.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include <linux/device-mapper.h>
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#define DM_MSG_PREFIX "log-writes"
|
||||
|
||||
/*
|
||||
* This target will sequentially log all writes to the target device onto the
|
||||
* log device. This is helpful for replaying writes to check for fs consistency
|
||||
* at all times. This target provides a mechanism to mark specific events to
|
||||
* check data at a later time. So for example you would:
|
||||
*
|
||||
* write data
|
||||
* fsync
|
||||
* dmsetup message /dev/whatever mark mymark
|
||||
* unmount /mnt/test
|
||||
*
|
||||
* Then replay the log up to mymark and check the contents of the replay to
|
||||
* verify it matches what was written.
|
||||
*
|
||||
* We log writes only after they have been flushed, this makes the log describe
|
||||
* close to the order in which the data hits the actual disk, not its cache. So
|
||||
* for example the following sequence (W means write, C means complete)
|
||||
*
|
||||
* Wa,Wb,Wc,Cc,Ca,FLUSH,FUAd,Cb,CFLUSH,CFUAd
|
||||
*
|
||||
* Would result in the log looking like this:
|
||||
*
|
||||
* c,a,flush,fuad,b,<other writes>,<next flush>
|
||||
*
|
||||
* This is meant to help expose problems where file systems do not properly wait
|
||||
* on data being written before invoking a FLUSH. FUA bypasses cache so once it
|
||||
* completes it is added to the log as it should be on disk.
|
||||
*
|
||||
* We treat DISCARDs as if they don't bypass cache so that they are logged in
|
||||
* order of completion along with the normal writes. If we didn't do it this
|
||||
* way we would process all the discards first and then write all the data, when
|
||||
* in fact we want to do the data and the discard in the order that they
|
||||
* completed.
|
||||
*/
|
||||
#define LOG_FLUSH_FLAG (1 << 0)
|
||||
#define LOG_FUA_FLAG (1 << 1)
|
||||
#define LOG_DISCARD_FLAG (1 << 2)
|
||||
#define LOG_MARK_FLAG (1 << 3)
|
||||
|
||||
#define WRITE_LOG_VERSION 1
|
||||
#define WRITE_LOG_MAGIC 0x6a736677736872
|
||||
|
||||
/*
|
||||
* The disk format for this is braindead simple.
|
||||
*
|
||||
* At byte 0 we have our super, followed by the following sequence for
|
||||
* nr_entries:
|
||||
*
|
||||
* [ 1 sector ][ entry->nr_sectors ]
|
||||
* [log_write_entry][ data written ]
|
||||
*
|
||||
* The log_write_entry takes up a full sector so we can have arbitrary length
|
||||
* marks and it leaves us room for extra content in the future.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Basic info about the log for userspace.
|
||||
*/
|
||||
struct log_write_super {
|
||||
__le64 magic;
|
||||
__le64 version;
|
||||
__le64 nr_entries;
|
||||
__le32 sectorsize;
|
||||
};
|
||||
|
||||
/*
|
||||
* sector - the sector we wrote.
|
||||
* nr_sectors - the number of sectors we wrote.
|
||||
* flags - flags for this log entry.
|
||||
* data_len - the size of the data in this log entry, this is for private log
|
||||
* entry stuff, the MARK data provided by userspace for example.
|
||||
*/
|
||||
struct log_write_entry {
|
||||
__le64 sector;
|
||||
__le64 nr_sectors;
|
||||
__le64 flags;
|
||||
__le64 data_len;
|
||||
};
|
||||
|
||||
struct log_writes_c {
|
||||
struct dm_dev *dev;
|
||||
struct dm_dev *logdev;
|
||||
u64 logged_entries;
|
||||
u32 sectorsize;
|
||||
atomic_t io_blocks;
|
||||
atomic_t pending_blocks;
|
||||
sector_t next_sector;
|
||||
sector_t end_sector;
|
||||
bool logging_enabled;
|
||||
bool device_supports_discard;
|
||||
spinlock_t blocks_lock;
|
||||
struct list_head unflushed_blocks;
|
||||
struct list_head logging_blocks;
|
||||
wait_queue_head_t wait;
|
||||
struct task_struct *log_kthread;
|
||||
};
|
||||
|
||||
struct pending_block {
|
||||
int vec_cnt;
|
||||
u64 flags;
|
||||
sector_t sector;
|
||||
sector_t nr_sectors;
|
||||
char *data;
|
||||
u32 datalen;
|
||||
struct list_head list;
|
||||
struct bio_vec vecs[0];
|
||||
};
|
||||
|
||||
struct per_bio_data {
|
||||
struct pending_block *block;
|
||||
};
|
||||
|
||||
static void put_pending_block(struct log_writes_c *lc)
|
||||
{
|
||||
if (atomic_dec_and_test(&lc->pending_blocks)) {
|
||||
smp_mb__after_atomic();
|
||||
if (waitqueue_active(&lc->wait))
|
||||
wake_up(&lc->wait);
|
||||
}
|
||||
}
|
||||
|
||||
static void put_io_block(struct log_writes_c *lc)
|
||||
{
|
||||
if (atomic_dec_and_test(&lc->io_blocks)) {
|
||||
smp_mb__after_atomic();
|
||||
if (waitqueue_active(&lc->wait))
|
||||
wake_up(&lc->wait);
|
||||
}
|
||||
}
|
||||
|
||||
static void log_end_io(struct bio *bio, int err)
|
||||
{
|
||||
struct log_writes_c *lc = bio->bi_private;
|
||||
struct bio_vec *bvec;
|
||||
int i;
|
||||
|
||||
if (err) {
|
||||
unsigned long flags;
|
||||
|
||||
DMERR("Error writing log block, error=%d", err);
|
||||
spin_lock_irqsave(&lc->blocks_lock, flags);
|
||||
lc->logging_enabled = false;
|
||||
spin_unlock_irqrestore(&lc->blocks_lock, flags);
|
||||
}
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, i)
|
||||
__free_page(bvec->bv_page);
|
||||
|
||||
put_io_block(lc);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Meant to be called if there is an error, it will free all the pages
|
||||
* associated with the block.
|
||||
*/
|
||||
static void free_pending_block(struct log_writes_c *lc,
|
||||
struct pending_block *block)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < block->vec_cnt; i++) {
|
||||
if (block->vecs[i].bv_page)
|
||||
__free_page(block->vecs[i].bv_page);
|
||||
}
|
||||
kfree(block->data);
|
||||
kfree(block);
|
||||
put_pending_block(lc);
|
||||
}
|
||||
|
||||
static int write_metadata(struct log_writes_c *lc, void *entry,
|
||||
size_t entrylen, void *data, size_t datalen,
|
||||
sector_t sector)
|
||||
{
|
||||
struct bio *bio;
|
||||
struct page *page;
|
||||
void *ptr;
|
||||
size_t ret;
|
||||
|
||||
bio = bio_alloc(GFP_KERNEL, 1);
|
||||
if (!bio) {
|
||||
DMERR("Couldn't alloc log bio");
|
||||
goto error;
|
||||
}
|
||||
bio->bi_iter.bi_size = 0;
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
bio->bi_bdev = lc->logdev->bdev;
|
||||
bio->bi_end_io = log_end_io;
|
||||
bio->bi_private = lc;
|
||||
set_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||
|
||||
page = alloc_page(GFP_KERNEL);
|
||||
if (!page) {
|
||||
DMERR("Couldn't alloc log page");
|
||||
bio_put(bio);
|
||||
goto error;
|
||||
}
|
||||
|
||||
ptr = kmap_atomic(page);
|
||||
memcpy(ptr, entry, entrylen);
|
||||
if (datalen)
|
||||
memcpy(ptr + entrylen, data, datalen);
|
||||
memset(ptr + entrylen + datalen, 0,
|
||||
lc->sectorsize - entrylen - datalen);
|
||||
kunmap_atomic(ptr);
|
||||
|
||||
ret = bio_add_page(bio, page, lc->sectorsize, 0);
|
||||
if (ret != lc->sectorsize) {
|
||||
DMERR("Couldn't add page to the log block");
|
||||
goto error_bio;
|
||||
}
|
||||
submit_bio(WRITE, bio);
|
||||
return 0;
|
||||
error_bio:
|
||||
bio_put(bio);
|
||||
__free_page(page);
|
||||
error:
|
||||
put_io_block(lc);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int log_one_block(struct log_writes_c *lc,
|
||||
struct pending_block *block, sector_t sector)
|
||||
{
|
||||
struct bio *bio;
|
||||
struct log_write_entry entry;
|
||||
size_t ret;
|
||||
int i;
|
||||
|
||||
entry.sector = cpu_to_le64(block->sector);
|
||||
entry.nr_sectors = cpu_to_le64(block->nr_sectors);
|
||||
entry.flags = cpu_to_le64(block->flags);
|
||||
entry.data_len = cpu_to_le64(block->datalen);
|
||||
if (write_metadata(lc, &entry, sizeof(entry), block->data,
|
||||
block->datalen, sector)) {
|
||||
free_pending_block(lc, block);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!block->vec_cnt)
|
||||
goto out;
|
||||
sector++;
|
||||
|
||||
bio = bio_alloc(GFP_KERNEL, block->vec_cnt);
|
||||
if (!bio) {
|
||||
DMERR("Couldn't alloc log bio");
|
||||
goto error;
|
||||
}
|
||||
atomic_inc(&lc->io_blocks);
|
||||
bio->bi_iter.bi_size = 0;
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
bio->bi_bdev = lc->logdev->bdev;
|
||||
bio->bi_end_io = log_end_io;
|
||||
bio->bi_private = lc;
|
||||
set_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||
|
||||
for (i = 0; i < block->vec_cnt; i++) {
|
||||
/*
|
||||
* The page offset is always 0 because we allocate a new page
|
||||
* for every bvec in the original bio for simplicity sake.
|
||||
*/
|
||||
ret = bio_add_page(bio, block->vecs[i].bv_page,
|
||||
block->vecs[i].bv_len, 0);
|
||||
if (ret != block->vecs[i].bv_len) {
|
||||
atomic_inc(&lc->io_blocks);
|
||||
submit_bio(WRITE, bio);
|
||||
bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i);
|
||||
if (!bio) {
|
||||
DMERR("Couldn't alloc log bio");
|
||||
goto error;
|
||||
}
|
||||
bio->bi_iter.bi_size = 0;
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
bio->bi_bdev = lc->logdev->bdev;
|
||||
bio->bi_end_io = log_end_io;
|
||||
bio->bi_private = lc;
|
||||
set_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||
|
||||
ret = bio_add_page(bio, block->vecs[i].bv_page,
|
||||
block->vecs[i].bv_len, 0);
|
||||
if (ret != block->vecs[i].bv_len) {
|
||||
DMERR("Couldn't add page on new bio?");
|
||||
bio_put(bio);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
sector += block->vecs[i].bv_len >> SECTOR_SHIFT;
|
||||
}
|
||||
submit_bio(WRITE, bio);
|
||||
out:
|
||||
kfree(block->data);
|
||||
kfree(block);
|
||||
put_pending_block(lc);
|
||||
return 0;
|
||||
error:
|
||||
free_pending_block(lc, block);
|
||||
put_io_block(lc);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int log_super(struct log_writes_c *lc)
|
||||
{
|
||||
struct log_write_super super;
|
||||
|
||||
super.magic = cpu_to_le64(WRITE_LOG_MAGIC);
|
||||
super.version = cpu_to_le64(WRITE_LOG_VERSION);
|
||||
super.nr_entries = cpu_to_le64(lc->logged_entries);
|
||||
super.sectorsize = cpu_to_le32(lc->sectorsize);
|
||||
|
||||
if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) {
|
||||
DMERR("Couldn't write super");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline sector_t logdev_last_sector(struct log_writes_c *lc)
|
||||
{
|
||||
return i_size_read(lc->logdev->bdev->bd_inode) >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
static int log_writes_kthread(void *arg)
|
||||
{
|
||||
struct log_writes_c *lc = (struct log_writes_c *)arg;
|
||||
sector_t sector = 0;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
bool super = false;
|
||||
bool logging_enabled;
|
||||
struct pending_block *block = NULL;
|
||||
int ret;
|
||||
|
||||
spin_lock_irq(&lc->blocks_lock);
|
||||
if (!list_empty(&lc->logging_blocks)) {
|
||||
block = list_first_entry(&lc->logging_blocks,
|
||||
struct pending_block, list);
|
||||
list_del_init(&block->list);
|
||||
if (!lc->logging_enabled)
|
||||
goto next;
|
||||
|
||||
sector = lc->next_sector;
|
||||
if (block->flags & LOG_DISCARD_FLAG)
|
||||
lc->next_sector++;
|
||||
else
|
||||
lc->next_sector += block->nr_sectors + 1;
|
||||
|
||||
/*
|
||||
* Apparently the size of the device may not be known
|
||||
* right away, so handle this properly.
|
||||
*/
|
||||
if (!lc->end_sector)
|
||||
lc->end_sector = logdev_last_sector(lc);
|
||||
if (lc->end_sector &&
|
||||
lc->next_sector >= lc->end_sector) {
|
||||
DMERR("Ran out of space on the logdev");
|
||||
lc->logging_enabled = false;
|
||||
goto next;
|
||||
}
|
||||
lc->logged_entries++;
|
||||
atomic_inc(&lc->io_blocks);
|
||||
|
||||
super = (block->flags & (LOG_FUA_FLAG | LOG_MARK_FLAG));
|
||||
if (super)
|
||||
atomic_inc(&lc->io_blocks);
|
||||
}
|
||||
next:
|
||||
logging_enabled = lc->logging_enabled;
|
||||
spin_unlock_irq(&lc->blocks_lock);
|
||||
if (block) {
|
||||
if (logging_enabled) {
|
||||
ret = log_one_block(lc, block, sector);
|
||||
if (!ret && super)
|
||||
ret = log_super(lc);
|
||||
if (ret) {
|
||||
spin_lock_irq(&lc->blocks_lock);
|
||||
lc->logging_enabled = false;
|
||||
spin_unlock_irq(&lc->blocks_lock);
|
||||
}
|
||||
} else
|
||||
free_pending_block(lc, block);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!try_to_freeze()) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (!kthread_should_stop() &&
|
||||
!atomic_read(&lc->pending_blocks))
|
||||
schedule();
|
||||
__set_current_state(TASK_RUNNING);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Construct a log-writes mapping:
|
||||
* log-writes <dev_path> <log_dev_path>
|
||||
*/
|
||||
static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||
{
|
||||
struct log_writes_c *lc;
|
||||
struct dm_arg_set as;
|
||||
const char *devname, *logdevname;
|
||||
|
||||
as.argc = argc;
|
||||
as.argv = argv;
|
||||
|
||||
if (argc < 2) {
|
||||
ti->error = "Invalid argument count";
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
lc = kzalloc(sizeof(struct log_writes_c), GFP_KERNEL);
|
||||
if (!lc) {
|
||||
ti->error = "Cannot allocate context";
|
||||
return -ENOMEM;
|
||||
}
|
||||
spin_lock_init(&lc->blocks_lock);
|
||||
INIT_LIST_HEAD(&lc->unflushed_blocks);
|
||||
INIT_LIST_HEAD(&lc->logging_blocks);
|
||||
init_waitqueue_head(&lc->wait);
|
||||
lc->sectorsize = 1 << SECTOR_SHIFT;
|
||||
atomic_set(&lc->io_blocks, 0);
|
||||
atomic_set(&lc->pending_blocks, 0);
|
||||
|
||||
devname = dm_shift_arg(&as);
|
||||
if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &lc->dev)) {
|
||||
ti->error = "Device lookup failed";
|
||||
goto bad;
|
||||
}
|
||||
|
||||
logdevname = dm_shift_arg(&as);
|
||||
if (dm_get_device(ti, logdevname, dm_table_get_mode(ti->table), &lc->logdev)) {
|
||||
ti->error = "Log device lookup failed";
|
||||
dm_put_device(ti, lc->dev);
|
||||
goto bad;
|
||||
}
|
||||
|
||||
lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write");
|
||||
if (!lc->log_kthread) {
|
||||
ti->error = "Couldn't alloc kthread";
|
||||
dm_put_device(ti, lc->dev);
|
||||
dm_put_device(ti, lc->logdev);
|
||||
goto bad;
|
||||
}
|
||||
|
||||
/* We put the super at sector 0, start logging at sector 1 */
|
||||
lc->next_sector = 1;
|
||||
lc->logging_enabled = true;
|
||||
lc->end_sector = logdev_last_sector(lc);
|
||||
lc->device_supports_discard = true;
|
||||
|
||||
ti->num_flush_bios = 1;
|
||||
ti->flush_supported = true;
|
||||
ti->num_discard_bios = 1;
|
||||
ti->discards_supported = true;
|
||||
ti->per_bio_data_size = sizeof(struct per_bio_data);
|
||||
ti->private = lc;
|
||||
return 0;
|
||||
|
||||
bad:
|
||||
kfree(lc);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int log_mark(struct log_writes_c *lc, char *data)
|
||||
{
|
||||
struct pending_block *block;
|
||||
size_t maxsize = lc->sectorsize - sizeof(struct log_write_entry);
|
||||
|
||||
block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
|
||||
if (!block) {
|
||||
DMERR("Error allocating pending block");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
block->data = kstrndup(data, maxsize, GFP_KERNEL);
|
||||
if (!block->data) {
|
||||
DMERR("Error copying mark data");
|
||||
kfree(block);
|
||||
return -ENOMEM;
|
||||
}
|
||||
atomic_inc(&lc->pending_blocks);
|
||||
block->datalen = strlen(block->data);
|
||||
block->flags |= LOG_MARK_FLAG;
|
||||
spin_lock_irq(&lc->blocks_lock);
|
||||
list_add_tail(&block->list, &lc->logging_blocks);
|
||||
spin_unlock_irq(&lc->blocks_lock);
|
||||
wake_up_process(lc->log_kthread);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void log_writes_dtr(struct dm_target *ti)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
|
||||
spin_lock_irq(&lc->blocks_lock);
|
||||
list_splice_init(&lc->unflushed_blocks, &lc->logging_blocks);
|
||||
spin_unlock_irq(&lc->blocks_lock);
|
||||
|
||||
/*
|
||||
* This is just nice to have since it'll update the super to include the
|
||||
* unflushed blocks, if it fails we don't really care.
|
||||
*/
|
||||
log_mark(lc, "dm-log-writes-end");
|
||||
wake_up_process(lc->log_kthread);
|
||||
wait_event(lc->wait, !atomic_read(&lc->io_blocks) &&
|
||||
!atomic_read(&lc->pending_blocks));
|
||||
kthread_stop(lc->log_kthread);
|
||||
|
||||
WARN_ON(!list_empty(&lc->logging_blocks));
|
||||
WARN_ON(!list_empty(&lc->unflushed_blocks));
|
||||
dm_put_device(ti, lc->dev);
|
||||
dm_put_device(ti, lc->logdev);
|
||||
kfree(lc);
|
||||
}
|
||||
|
||||
static void normal_map_bio(struct dm_target *ti, struct bio *bio)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
|
||||
bio->bi_bdev = lc->dev->bdev;
|
||||
}
|
||||
|
||||
static int log_writes_map(struct dm_target *ti, struct bio *bio)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
|
||||
struct pending_block *block;
|
||||
struct bvec_iter iter;
|
||||
struct bio_vec bv;
|
||||
size_t alloc_size;
|
||||
int i = 0;
|
||||
bool flush_bio = (bio->bi_rw & REQ_FLUSH);
|
||||
bool fua_bio = (bio->bi_rw & REQ_FUA);
|
||||
bool discard_bio = (bio->bi_rw & REQ_DISCARD);
|
||||
|
||||
pb->block = NULL;
|
||||
|
||||
/* Don't bother doing anything if logging has been disabled */
|
||||
if (!lc->logging_enabled)
|
||||
goto map_bio;
|
||||
|
||||
/*
|
||||
* Map reads as normal.
|
||||
*/
|
||||
if (bio_data_dir(bio) == READ)
|
||||
goto map_bio;
|
||||
|
||||
/* No sectors and not a flush? Don't care */
|
||||
if (!bio_sectors(bio) && !flush_bio)
|
||||
goto map_bio;
|
||||
|
||||
/*
|
||||
* Discards will have bi_size set but there's no actual data, so just
|
||||
* allocate the size of the pending block.
|
||||
*/
|
||||
if (discard_bio)
|
||||
alloc_size = sizeof(struct pending_block);
|
||||
else
|
||||
alloc_size = sizeof(struct pending_block) + sizeof(struct bio_vec) * bio_segments(bio);
|
||||
|
||||
block = kzalloc(alloc_size, GFP_NOIO);
|
||||
if (!block) {
|
||||
DMERR("Error allocating pending block");
|
||||
spin_lock_irq(&lc->blocks_lock);
|
||||
lc->logging_enabled = false;
|
||||
spin_unlock_irq(&lc->blocks_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
INIT_LIST_HEAD(&block->list);
|
||||
pb->block = block;
|
||||
atomic_inc(&lc->pending_blocks);
|
||||
|
||||
if (flush_bio)
|
||||
block->flags |= LOG_FLUSH_FLAG;
|
||||
if (fua_bio)
|
||||
block->flags |= LOG_FUA_FLAG;
|
||||
if (discard_bio)
|
||||
block->flags |= LOG_DISCARD_FLAG;
|
||||
|
||||
block->sector = bio->bi_iter.bi_sector;
|
||||
block->nr_sectors = bio_sectors(bio);
|
||||
|
||||
/* We don't need the data, just submit */
|
||||
if (discard_bio) {
|
||||
WARN_ON(flush_bio || fua_bio);
|
||||
if (lc->device_supports_discard)
|
||||
goto map_bio;
|
||||
bio_endio(bio, 0);
|
||||
return DM_MAPIO_SUBMITTED;
|
||||
}
|
||||
|
||||
/* Flush bio, splice the unflushed blocks onto this list and submit */
|
||||
if (flush_bio && !bio_sectors(bio)) {
|
||||
spin_lock_irq(&lc->blocks_lock);
|
||||
list_splice_init(&lc->unflushed_blocks, &block->list);
|
||||
spin_unlock_irq(&lc->blocks_lock);
|
||||
goto map_bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* We will write this bio somewhere else way later so we need to copy
|
||||
* the actual contents into new pages so we know the data will always be
|
||||
* there.
|
||||
*
|
||||
* We do this because this could be a bio from O_DIRECT in which case we
|
||||
* can't just hold onto the page until some later point, we have to
|
||||
* manually copy the contents.
|
||||
*/
|
||||
bio_for_each_segment(bv, bio, iter) {
|
||||
struct page *page;
|
||||
void *src, *dst;
|
||||
|
||||
page = alloc_page(GFP_NOIO);
|
||||
if (!page) {
|
||||
DMERR("Error allocing page");
|
||||
free_pending_block(lc, block);
|
||||
spin_lock_irq(&lc->blocks_lock);
|
||||
lc->logging_enabled = false;
|
||||
spin_unlock_irq(&lc->blocks_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
src = kmap_atomic(bv.bv_page);
|
||||
dst = kmap_atomic(page);
|
||||
memcpy(dst, src + bv.bv_offset, bv.bv_len);
|
||||
kunmap_atomic(dst);
|
||||
kunmap_atomic(src);
|
||||
block->vecs[i].bv_page = page;
|
||||
block->vecs[i].bv_len = bv.bv_len;
|
||||
block->vec_cnt++;
|
||||
i++;
|
||||
}
|
||||
|
||||
/* Had a flush with data in it, weird */
|
||||
if (flush_bio) {
|
||||
spin_lock_irq(&lc->blocks_lock);
|
||||
list_splice_init(&lc->unflushed_blocks, &block->list);
|
||||
spin_unlock_irq(&lc->blocks_lock);
|
||||
}
|
||||
map_bio:
|
||||
normal_map_bio(ti, bio);
|
||||
return DM_MAPIO_REMAPPED;
|
||||
}
|
||||
|
||||
static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
|
||||
|
||||
if (bio_data_dir(bio) == WRITE && pb->block) {
|
||||
struct pending_block *block = pb->block;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&lc->blocks_lock, flags);
|
||||
if (block->flags & LOG_FLUSH_FLAG) {
|
||||
list_splice_tail_init(&block->list, &lc->logging_blocks);
|
||||
list_add_tail(&block->list, &lc->logging_blocks);
|
||||
wake_up_process(lc->log_kthread);
|
||||
} else if (block->flags & LOG_FUA_FLAG) {
|
||||
list_add_tail(&block->list, &lc->logging_blocks);
|
||||
wake_up_process(lc->log_kthread);
|
||||
} else
|
||||
list_add_tail(&block->list, &lc->unflushed_blocks);
|
||||
spin_unlock_irqrestore(&lc->blocks_lock, flags);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* INFO format: <logged entries> <highest allocated sector>
|
||||
*/
|
||||
static void log_writes_status(struct dm_target *ti, status_type_t type,
|
||||
unsigned status_flags, char *result,
|
||||
unsigned maxlen)
|
||||
{
|
||||
unsigned sz = 0;
|
||||
struct log_writes_c *lc = ti->private;
|
||||
|
||||
switch (type) {
|
||||
case STATUSTYPE_INFO:
|
||||
DMEMIT("%llu %llu", lc->logged_entries,
|
||||
(unsigned long long)lc->next_sector - 1);
|
||||
if (!lc->logging_enabled)
|
||||
DMEMIT(" logging_disabled");
|
||||
break;
|
||||
|
||||
case STATUSTYPE_TABLE:
|
||||
DMEMIT("%s %s", lc->dev->name, lc->logdev->name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int log_writes_ioctl(struct dm_target *ti, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
struct dm_dev *dev = lc->dev;
|
||||
int r = 0;
|
||||
|
||||
/*
|
||||
* Only pass ioctls through if the device sizes match exactly.
|
||||
*/
|
||||
if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
|
||||
r = scsi_verify_blk_ioctl(NULL, cmd);
|
||||
|
||||
return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
|
||||
}
|
||||
|
||||
static int log_writes_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
|
||||
struct bio_vec *biovec, int max_size)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
struct request_queue *q = bdev_get_queue(lc->dev->bdev);
|
||||
|
||||
if (!q->merge_bvec_fn)
|
||||
return max_size;
|
||||
|
||||
bvm->bi_bdev = lc->dev->bdev;
|
||||
bvm->bi_sector = dm_target_offset(ti, bvm->bi_sector);
|
||||
|
||||
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
|
||||
}
|
||||
|
||||
static int log_writes_iterate_devices(struct dm_target *ti,
|
||||
iterate_devices_callout_fn fn,
|
||||
void *data)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
|
||||
return fn(ti, lc->dev, 0, ti->len, data);
|
||||
}
|
||||
|
||||
/*
|
||||
* Messages supported:
|
||||
* mark <mark data> - specify the marked data.
|
||||
*/
|
||||
static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
struct log_writes_c *lc = ti->private;
|
||||
|
||||
if (argc != 2) {
|
||||
DMWARN("Invalid log-writes message arguments, expect 2 arguments, got %d", argc);
|
||||
return r;
|
||||
}
|
||||
|
||||
if (!strcasecmp(argv[0], "mark"))
|
||||
r = log_mark(lc, argv[1]);
|
||||
else
|
||||
DMWARN("Unrecognised log writes target message received: %s", argv[0]);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
||||
{
|
||||
struct log_writes_c *lc = ti->private;
|
||||
struct request_queue *q = bdev_get_queue(lc->dev->bdev);
|
||||
|
||||
if (!q || !blk_queue_discard(q)) {
|
||||
lc->device_supports_discard = false;
|
||||
limits->discard_granularity = 1 << SECTOR_SHIFT;
|
||||
limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
|
||||
}
|
||||
}
|
||||
|
||||
static struct target_type log_writes_target = {
|
||||
.name = "log-writes",
|
||||
.version = {1, 0, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = log_writes_ctr,
|
||||
.dtr = log_writes_dtr,
|
||||
.map = log_writes_map,
|
||||
.end_io = normal_end_io,
|
||||
.status = log_writes_status,
|
||||
.ioctl = log_writes_ioctl,
|
||||
.merge = log_writes_merge,
|
||||
.message = log_writes_message,
|
||||
.iterate_devices = log_writes_iterate_devices,
|
||||
.io_hints = log_writes_io_hints,
|
||||
};
|
||||
|
||||
static int __init dm_log_writes_init(void)
|
||||
{
|
||||
int r = dm_register_target(&log_writes_target);
|
||||
|
||||
if (r < 0)
|
||||
DMERR("register failed %d", r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void __exit dm_log_writes_exit(void)
|
||||
{
|
||||
dm_unregister_target(&log_writes_target);
|
||||
}
|
||||
|
||||
module_init(dm_log_writes_init);
|
||||
module_exit(dm_log_writes_exit);
|
||||
|
||||
MODULE_DESCRIPTION(DM_NAME " log writes target");
|
||||
MODULE_AUTHOR("Josef Bacik <jbacik@fb.com>");
|
||||
MODULE_LICENSE("GPL");
|
@ -428,7 +428,7 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
|
||||
} else {
|
||||
/* blk-mq request-based interface */
|
||||
*__clone = blk_get_request(bdev_get_queue(bdev),
|
||||
rq_data_dir(rq), GFP_KERNEL);
|
||||
rq_data_dir(rq), GFP_ATOMIC);
|
||||
if (IS_ERR(*__clone))
|
||||
/* ENOMEM, requeue */
|
||||
return r;
|
||||
@ -1627,7 +1627,7 @@ static int __pgpath_busy(struct pgpath *pgpath)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
|
||||
|
||||
return dm_underlying_device_busy(q);
|
||||
return blk_lld_busy(q);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1703,7 +1703,7 @@ out:
|
||||
*---------------------------------------------------------------*/
|
||||
static struct target_type multipath_target = {
|
||||
.name = "multipath",
|
||||
.version = {1, 8, 0},
|
||||
.version = {1, 9, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = multipath_ctr,
|
||||
.dtr = multipath_dtr,
|
||||
|
@ -11,7 +11,7 @@
|
||||
struct dm_sysfs_attr {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct mapped_device *, char *);
|
||||
ssize_t (*store)(struct mapped_device *, char *);
|
||||
ssize_t (*store)(struct mapped_device *, const char *, size_t count);
|
||||
};
|
||||
|
||||
#define DM_ATTR_RO(_name) \
|
||||
@ -39,6 +39,31 @@ static ssize_t dm_attr_show(struct kobject *kobj, struct attribute *attr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DM_ATTR_RW(_name) \
|
||||
struct dm_sysfs_attr dm_attr_##_name = \
|
||||
__ATTR(_name, S_IRUGO | S_IWUSR, dm_attr_##_name##_show, dm_attr_##_name##_store)
|
||||
|
||||
static ssize_t dm_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct dm_sysfs_attr *dm_attr;
|
||||
struct mapped_device *md;
|
||||
ssize_t ret;
|
||||
|
||||
dm_attr = container_of(attr, struct dm_sysfs_attr, attr);
|
||||
if (!dm_attr->store)
|
||||
return -EIO;
|
||||
|
||||
md = dm_get_from_kobject(kobj);
|
||||
if (!md)
|
||||
return -EINVAL;
|
||||
|
||||
ret = dm_attr->store(md, page, count);
|
||||
dm_put(md);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t dm_attr_name_show(struct mapped_device *md, char *buf)
|
||||
{
|
||||
if (dm_copy_name_and_uuid(md, buf, NULL))
|
||||
@ -64,25 +89,33 @@ static ssize_t dm_attr_suspended_show(struct mapped_device *md, char *buf)
|
||||
return strlen(buf);
|
||||
}
|
||||
|
||||
static ssize_t dm_attr_use_blk_mq_show(struct mapped_device *md, char *buf)
|
||||
{
|
||||
sprintf(buf, "%d\n", dm_use_blk_mq(md));
|
||||
|
||||
return strlen(buf);
|
||||
}
|
||||
|
||||
static DM_ATTR_RO(name);
|
||||
static DM_ATTR_RO(uuid);
|
||||
static DM_ATTR_RO(suspended);
|
||||
static DM_ATTR_RO(use_blk_mq);
|
||||
static DM_ATTR_RW(rq_based_seq_io_merge_deadline);
|
||||
|
||||
static struct attribute *dm_attrs[] = {
|
||||
&dm_attr_name.attr,
|
||||
&dm_attr_uuid.attr,
|
||||
&dm_attr_suspended.attr,
|
||||
&dm_attr_use_blk_mq.attr,
|
||||
&dm_attr_rq_based_seq_io_merge_deadline.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct sysfs_ops dm_sysfs_ops = {
|
||||
.show = dm_attr_show,
|
||||
.store = dm_attr_store,
|
||||
};
|
||||
|
||||
/*
|
||||
* dm kobject is embedded in mapped_device structure
|
||||
* no need to define release function here
|
||||
*/
|
||||
static struct kobj_type dm_ktype = {
|
||||
.sysfs_ops = &dm_sysfs_ops,
|
||||
.default_attrs = dm_attrs,
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/mount.h>
|
||||
|
||||
#define DM_MSG_PREFIX "table"
|
||||
|
||||
@ -372,23 +374,18 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
|
||||
int r;
|
||||
dev_t uninitialized_var(dev);
|
||||
struct dm_dev_internal *dd;
|
||||
unsigned int major, minor;
|
||||
struct dm_table *t = ti->table;
|
||||
char dummy;
|
||||
struct block_device *bdev;
|
||||
|
||||
BUG_ON(!t);
|
||||
|
||||
if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) {
|
||||
/* Extract the major/minor numbers */
|
||||
dev = MKDEV(major, minor);
|
||||
if (MAJOR(dev) != major || MINOR(dev) != minor)
|
||||
return -EOVERFLOW;
|
||||
/* convert the path to a device */
|
||||
bdev = lookup_bdev(path);
|
||||
if (IS_ERR(bdev)) {
|
||||
dev = name_to_dev_t(path);
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
} else {
|
||||
/* convert the path to a device */
|
||||
struct block_device *bdev = lookup_bdev(path);
|
||||
|
||||
if (IS_ERR(bdev))
|
||||
return PTR_ERR(bdev);
|
||||
dev = bdev->bd_dev;
|
||||
bdput(bdev);
|
||||
}
|
||||
@ -939,7 +936,7 @@ bool dm_table_mq_request_based(struct dm_table *t)
|
||||
return dm_table_get_type(t) == DM_TYPE_MQ_REQUEST_BASED;
|
||||
}
|
||||
|
||||
static int dm_table_alloc_md_mempools(struct dm_table *t)
|
||||
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
|
||||
{
|
||||
unsigned type = dm_table_get_type(t);
|
||||
unsigned per_bio_data_size = 0;
|
||||
@ -957,7 +954,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t)
|
||||
per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size);
|
||||
}
|
||||
|
||||
t->mempools = dm_alloc_md_mempools(type, t->integrity_supported, per_bio_data_size);
|
||||
t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size);
|
||||
if (!t->mempools)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1127,7 +1124,7 @@ int dm_table_complete(struct dm_table *t)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = dm_table_alloc_md_mempools(t);
|
||||
r = dm_table_alloc_md_mempools(t, t->md);
|
||||
if (r)
|
||||
DMERR("unable to allocate mempools");
|
||||
|
||||
@ -1339,14 +1336,14 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
|
||||
continue;
|
||||
|
||||
if (ti->flush_supported)
|
||||
return 1;
|
||||
return true;
|
||||
|
||||
if (ti->type->iterate_devices &&
|
||||
ti->type->iterate_devices(ti, device_flush_capable, &flush))
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool dm_table_discard_zeroes_data(struct dm_table *t)
|
||||
@ -1359,10 +1356,10 @@ static bool dm_table_discard_zeroes_data(struct dm_table *t)
|
||||
ti = dm_table_get_target(t, i++);
|
||||
|
||||
if (ti->discard_zeroes_data_unsupported)
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
|
||||
@ -1408,10 +1405,10 @@ static bool dm_table_all_devices_attribute(struct dm_table *t,
|
||||
|
||||
if (!ti->type->iterate_devices ||
|
||||
!ti->type->iterate_devices(ti, func, NULL))
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev,
|
||||
@ -1468,14 +1465,14 @@ static bool dm_table_supports_discards(struct dm_table *t)
|
||||
continue;
|
||||
|
||||
if (ti->discards_supported)
|
||||
return 1;
|
||||
return true;
|
||||
|
||||
if (ti->type->iterate_devices &&
|
||||
ti->type->iterate_devices(ti, device_discard_capable, NULL))
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
@ -1677,20 +1674,6 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
|
||||
return r;
|
||||
}
|
||||
|
||||
int dm_table_any_busy_target(struct dm_table *t)
|
||||
{
|
||||
unsigned i;
|
||||
struct dm_target *ti;
|
||||
|
||||
for (i = 0; i < t->num_targets; i++) {
|
||||
ti = t->targets + i;
|
||||
if (ti->type->busy && ti->type->busy(ti))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct mapped_device *dm_table_get_md(struct dm_table *t)
|
||||
{
|
||||
return t->md;
|
||||
@ -1709,9 +1692,13 @@ void dm_table_run_md_queue_async(struct dm_table *t)
|
||||
md = dm_table_get_md(t);
|
||||
queue = dm_get_md_queue(md);
|
||||
if (queue) {
|
||||
spin_lock_irqsave(queue->queue_lock, flags);
|
||||
blk_run_queue_async(queue);
|
||||
spin_unlock_irqrestore(queue->queue_lock, flags);
|
||||
if (queue->mq_ops)
|
||||
blk_mq_run_hw_queues(queue, true);
|
||||
else {
|
||||
spin_lock_irqsave(queue->queue_lock, flags);
|
||||
blk_run_queue_async(queue);
|
||||
spin_unlock_irqrestore(queue->queue_lock, flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(dm_table_run_md_queue_async);
|
||||
|
@ -18,20 +18,39 @@
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/device-mapper.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <crypto/hash.h>
|
||||
|
||||
#define DM_MSG_PREFIX "verity"
|
||||
|
||||
#define DM_VERITY_ENV_LENGTH 42
|
||||
#define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR"
|
||||
|
||||
#define DM_VERITY_IO_VEC_INLINE 16
|
||||
#define DM_VERITY_MEMPOOL_SIZE 4
|
||||
#define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144
|
||||
|
||||
#define DM_VERITY_MAX_LEVELS 63
|
||||
#define DM_VERITY_MAX_CORRUPTED_ERRS 100
|
||||
|
||||
#define DM_VERITY_OPT_LOGGING "ignore_corruption"
|
||||
#define DM_VERITY_OPT_RESTART "restart_on_corruption"
|
||||
|
||||
static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
|
||||
|
||||
module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
|
||||
|
||||
enum verity_mode {
|
||||
DM_VERITY_MODE_EIO,
|
||||
DM_VERITY_MODE_LOGGING,
|
||||
DM_VERITY_MODE_RESTART
|
||||
};
|
||||
|
||||
enum verity_block_type {
|
||||
DM_VERITY_BLOCK_TYPE_DATA,
|
||||
DM_VERITY_BLOCK_TYPE_METADATA
|
||||
};
|
||||
|
||||
struct dm_verity {
|
||||
struct dm_dev *data_dev;
|
||||
struct dm_dev *hash_dev;
|
||||
@ -54,6 +73,8 @@ struct dm_verity {
|
||||
unsigned digest_size; /* digest size for the current hash algorithm */
|
||||
unsigned shash_descsize;/* the size of temporary space for crypto */
|
||||
int hash_failed; /* set to 1 if hash of any block failed */
|
||||
enum verity_mode mode; /* mode for handling verification errors */
|
||||
unsigned corrupted_errs;/* Number of errors for corrupted blocks */
|
||||
|
||||
mempool_t *vec_mempool; /* mempool of bio vector */
|
||||
|
||||
@ -174,6 +195,57 @@ static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
|
||||
*offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle verification errors.
|
||||
*/
|
||||
static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
|
||||
unsigned long long block)
|
||||
{
|
||||
char verity_env[DM_VERITY_ENV_LENGTH];
|
||||
char *envp[] = { verity_env, NULL };
|
||||
const char *type_str = "";
|
||||
struct mapped_device *md = dm_table_get_md(v->ti->table);
|
||||
|
||||
/* Corruption should be visible in device status in all modes */
|
||||
v->hash_failed = 1;
|
||||
|
||||
if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS)
|
||||
goto out;
|
||||
|
||||
v->corrupted_errs++;
|
||||
|
||||
switch (type) {
|
||||
case DM_VERITY_BLOCK_TYPE_DATA:
|
||||
type_str = "data";
|
||||
break;
|
||||
case DM_VERITY_BLOCK_TYPE_METADATA:
|
||||
type_str = "metadata";
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str,
|
||||
block);
|
||||
|
||||
if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS)
|
||||
DMERR("%s: reached maximum errors", v->data_dev->name);
|
||||
|
||||
snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu",
|
||||
DM_VERITY_ENV_VAR_NAME, type, block);
|
||||
|
||||
kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp);
|
||||
|
||||
out:
|
||||
if (v->mode == DM_VERITY_MODE_LOGGING)
|
||||
return 0;
|
||||
|
||||
if (v->mode == DM_VERITY_MODE_RESTART)
|
||||
kernel_restart("dm-verity device corrupted");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify hash of a metadata block pertaining to the specified data block
|
||||
* ("block" argument) at a specified level ("level" argument).
|
||||
@ -251,11 +323,11 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block,
|
||||
goto release_ret_r;
|
||||
}
|
||||
if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
|
||||
DMERR_LIMIT("metadata block %llu is corrupted",
|
||||
(unsigned long long)hash_block);
|
||||
v->hash_failed = 1;
|
||||
r = -EIO;
|
||||
goto release_ret_r;
|
||||
if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA,
|
||||
hash_block)) {
|
||||
r = -EIO;
|
||||
goto release_ret_r;
|
||||
}
|
||||
} else
|
||||
aux->hash_verified = 1;
|
||||
}
|
||||
@ -367,10 +439,9 @@ test_block_hash:
|
||||
return r;
|
||||
}
|
||||
if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
|
||||
DMERR_LIMIT("data block %llu is corrupted",
|
||||
(unsigned long long)(io->block + b));
|
||||
v->hash_failed = 1;
|
||||
return -EIO;
|
||||
if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
|
||||
io->block + b))
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
@ -546,6 +617,19 @@ static void verity_status(struct dm_target *ti, status_type_t type,
|
||||
else
|
||||
for (x = 0; x < v->salt_size; x++)
|
||||
DMEMIT("%02x", v->salt[x]);
|
||||
if (v->mode != DM_VERITY_MODE_EIO) {
|
||||
DMEMIT(" 1 ");
|
||||
switch (v->mode) {
|
||||
case DM_VERITY_MODE_LOGGING:
|
||||
DMEMIT(DM_VERITY_OPT_LOGGING);
|
||||
break;
|
||||
case DM_VERITY_MODE_RESTART:
|
||||
DMEMIT(DM_VERITY_OPT_RESTART);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -647,13 +731,19 @@ static void verity_dtr(struct dm_target *ti)
|
||||
static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||
{
|
||||
struct dm_verity *v;
|
||||
unsigned num;
|
||||
struct dm_arg_set as;
|
||||
const char *opt_string;
|
||||
unsigned int num, opt_params;
|
||||
unsigned long long num_ll;
|
||||
int r;
|
||||
int i;
|
||||
sector_t hash_position;
|
||||
char dummy;
|
||||
|
||||
static struct dm_arg _args[] = {
|
||||
{0, 1, "Invalid number of feature args"},
|
||||
};
|
||||
|
||||
v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
|
||||
if (!v) {
|
||||
ti->error = "Cannot allocate verity structure";
|
||||
@ -668,8 +758,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||
goto bad;
|
||||
}
|
||||
|
||||
if (argc != 10) {
|
||||
ti->error = "Invalid argument count: exactly 10 arguments required";
|
||||
if (argc < 10) {
|
||||
ti->error = "Not enough arguments";
|
||||
r = -EINVAL;
|
||||
goto bad;
|
||||
}
|
||||
@ -790,6 +880,39 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
argv += 10;
|
||||
argc -= 10;
|
||||
|
||||
/* Optional parameters */
|
||||
if (argc) {
|
||||
as.argc = argc;
|
||||
as.argv = argv;
|
||||
|
||||
r = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
|
||||
if (r)
|
||||
goto bad;
|
||||
|
||||
while (opt_params) {
|
||||
opt_params--;
|
||||
opt_string = dm_shift_arg(&as);
|
||||
if (!opt_string) {
|
||||
ti->error = "Not enough feature arguments";
|
||||
r = -EINVAL;
|
||||
goto bad;
|
||||
}
|
||||
|
||||
if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING))
|
||||
v->mode = DM_VERITY_MODE_LOGGING;
|
||||
else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART))
|
||||
v->mode = DM_VERITY_MODE_RESTART;
|
||||
else {
|
||||
ti->error = "Invalid feature arguments";
|
||||
r = -EINVAL;
|
||||
goto bad;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
v->hash_per_block_bits =
|
||||
__fls((1 << v->hash_dev_block_bits) / v->digest_size);
|
||||
|
||||
|
556
drivers/md/dm.c
556
drivers/md/dm.c
@ -21,6 +21,9 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/elevator.h> /* for rq_end_sector() */
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
@ -216,8 +219,29 @@ struct mapped_device {
|
||||
|
||||
struct kthread_worker kworker;
|
||||
struct task_struct *kworker_task;
|
||||
|
||||
/* for request-based merge heuristic in dm_request_fn() */
|
||||
unsigned seq_rq_merge_deadline_usecs;
|
||||
int last_rq_rw;
|
||||
sector_t last_rq_pos;
|
||||
ktime_t last_rq_start_time;
|
||||
|
||||
/* for blk-mq request-based DM support */
|
||||
struct blk_mq_tag_set tag_set;
|
||||
bool use_blk_mq;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DM_MQ_DEFAULT
|
||||
static bool use_blk_mq = true;
|
||||
#else
|
||||
static bool use_blk_mq = false;
|
||||
#endif
|
||||
|
||||
bool dm_use_blk_mq(struct mapped_device *md)
|
||||
{
|
||||
return md->use_blk_mq;
|
||||
}
|
||||
|
||||
/*
|
||||
* For mempools pre-allocation at the table loading time.
|
||||
*/
|
||||
@ -250,35 +274,35 @@ static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
|
||||
*/
|
||||
static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
|
||||
|
||||
static unsigned __dm_get_reserved_ios(unsigned *reserved_ios,
|
||||
static unsigned __dm_get_module_param(unsigned *module_param,
|
||||
unsigned def, unsigned max)
|
||||
{
|
||||
unsigned ios = ACCESS_ONCE(*reserved_ios);
|
||||
unsigned modified_ios = 0;
|
||||
unsigned param = ACCESS_ONCE(*module_param);
|
||||
unsigned modified_param = 0;
|
||||
|
||||
if (!ios)
|
||||
modified_ios = def;
|
||||
else if (ios > max)
|
||||
modified_ios = max;
|
||||
if (!param)
|
||||
modified_param = def;
|
||||
else if (param > max)
|
||||
modified_param = max;
|
||||
|
||||
if (modified_ios) {
|
||||
(void)cmpxchg(reserved_ios, ios, modified_ios);
|
||||
ios = modified_ios;
|
||||
if (modified_param) {
|
||||
(void)cmpxchg(module_param, param, modified_param);
|
||||
param = modified_param;
|
||||
}
|
||||
|
||||
return ios;
|
||||
return param;
|
||||
}
|
||||
|
||||
unsigned dm_get_reserved_bio_based_ios(void)
|
||||
{
|
||||
return __dm_get_reserved_ios(&reserved_bio_based_ios,
|
||||
return __dm_get_module_param(&reserved_bio_based_ios,
|
||||
RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
|
||||
|
||||
unsigned dm_get_reserved_rq_based_ios(void)
|
||||
{
|
||||
return __dm_get_reserved_ios(&reserved_rq_based_ios,
|
||||
return __dm_get_module_param(&reserved_rq_based_ios,
|
||||
RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
|
||||
@ -1017,6 +1041,11 @@ static void end_clone_bio(struct bio *clone, int error)
|
||||
blk_update_request(tio->orig, 0, nr_bytes);
|
||||
}
|
||||
|
||||
static struct dm_rq_target_io *tio_from_request(struct request *rq)
|
||||
{
|
||||
return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't touch any member of the md after calling this function because
|
||||
* the md may be freed in dm_put() at the end of this function.
|
||||
@ -1024,10 +1053,13 @@ static void end_clone_bio(struct bio *clone, int error)
|
||||
*/
|
||||
static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
|
||||
{
|
||||
int nr_requests_pending;
|
||||
|
||||
atomic_dec(&md->pending[rw]);
|
||||
|
||||
/* nudge anyone waiting on suspend queue */
|
||||
if (!md_in_flight(md))
|
||||
nr_requests_pending = md_in_flight(md);
|
||||
if (!nr_requests_pending)
|
||||
wake_up(&md->wait);
|
||||
|
||||
/*
|
||||
@ -1036,8 +1068,13 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
|
||||
* back into ->request_fn() could deadlock attempting to grab the
|
||||
* queue lock again.
|
||||
*/
|
||||
if (run_queue)
|
||||
blk_run_queue_async(md->queue);
|
||||
if (run_queue) {
|
||||
if (md->queue->mq_ops)
|
||||
blk_mq_run_hw_queues(md->queue, true);
|
||||
else if (!nr_requests_pending ||
|
||||
(nr_requests_pending >= md->queue->nr_congestion_on))
|
||||
blk_run_queue_async(md->queue);
|
||||
}
|
||||
|
||||
/*
|
||||
* dm_put() must be at the end of this function. See the comment above
|
||||
@ -1048,13 +1085,18 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
|
||||
static void free_rq_clone(struct request *clone)
|
||||
{
|
||||
struct dm_rq_target_io *tio = clone->end_io_data;
|
||||
struct mapped_device *md = tio->md;
|
||||
|
||||
blk_rq_unprep_clone(clone);
|
||||
if (clone->q && clone->q->mq_ops)
|
||||
|
||||
if (clone->q->mq_ops)
|
||||
tio->ti->type->release_clone_rq(clone);
|
||||
else
|
||||
free_clone_request(tio->md, clone);
|
||||
free_rq_tio(tio);
|
||||
else if (!md->queue->mq_ops)
|
||||
/* request_fn queue stacked on request_fn queue(s) */
|
||||
free_clone_request(md, clone);
|
||||
|
||||
if (!md->queue->mq_ops)
|
||||
free_rq_tio(tio);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1083,17 +1125,22 @@ static void dm_end_request(struct request *clone, int error)
|
||||
}
|
||||
|
||||
free_rq_clone(clone);
|
||||
blk_end_request_all(rq, error);
|
||||
if (!rq->q->mq_ops)
|
||||
blk_end_request_all(rq, error);
|
||||
else
|
||||
blk_mq_end_request(rq, error);
|
||||
rq_completed(md, rw, true);
|
||||
}
|
||||
|
||||
static void dm_unprep_request(struct request *rq)
|
||||
{
|
||||
struct dm_rq_target_io *tio = rq->special;
|
||||
struct dm_rq_target_io *tio = tio_from_request(rq);
|
||||
struct request *clone = tio->clone;
|
||||
|
||||
rq->special = NULL;
|
||||
rq->cmd_flags &= ~REQ_DONTPREP;
|
||||
if (!rq->q->mq_ops) {
|
||||
rq->special = NULL;
|
||||
rq->cmd_flags &= ~REQ_DONTPREP;
|
||||
}
|
||||
|
||||
if (clone)
|
||||
free_rq_clone(clone);
|
||||
@ -1102,18 +1149,29 @@ static void dm_unprep_request(struct request *rq)
|
||||
/*
|
||||
* Requeue the original request of a clone.
|
||||
*/
|
||||
static void dm_requeue_unmapped_original_request(struct mapped_device *md,
|
||||
struct request *rq)
|
||||
static void old_requeue_request(struct request *rq)
|
||||
{
|
||||
int rw = rq_data_dir(rq);
|
||||
struct request_queue *q = rq->q;
|
||||
unsigned long flags;
|
||||
|
||||
dm_unprep_request(rq);
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
blk_requeue_request(q, rq);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
static void dm_requeue_unmapped_original_request(struct mapped_device *md,
|
||||
struct request *rq)
|
||||
{
|
||||
int rw = rq_data_dir(rq);
|
||||
|
||||
dm_unprep_request(rq);
|
||||
|
||||
if (!rq->q->mq_ops)
|
||||
old_requeue_request(rq);
|
||||
else {
|
||||
blk_mq_requeue_request(rq);
|
||||
blk_mq_kick_requeue_list(rq->q);
|
||||
}
|
||||
|
||||
rq_completed(md, rw, false);
|
||||
}
|
||||
@ -1125,33 +1183,42 @@ static void dm_requeue_unmapped_request(struct request *clone)
|
||||
dm_requeue_unmapped_original_request(tio->md, tio->orig);
|
||||
}
|
||||
|
||||
static void __stop_queue(struct request_queue *q)
|
||||
static void old_stop_queue(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (blk_queue_stopped(q))
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
blk_stop_queue(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
static void stop_queue(struct request_queue *q)
|
||||
{
|
||||
if (!q->mq_ops)
|
||||
old_stop_queue(q);
|
||||
else
|
||||
blk_mq_stop_hw_queues(q);
|
||||
}
|
||||
|
||||
static void old_start_queue(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
__stop_queue(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
static void __start_queue(struct request_queue *q)
|
||||
{
|
||||
if (blk_queue_stopped(q))
|
||||
blk_start_queue(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
static void start_queue(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
__start_queue(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
if (!q->mq_ops)
|
||||
old_start_queue(q);
|
||||
else
|
||||
blk_mq_start_stopped_hw_queues(q, true);
|
||||
}
|
||||
|
||||
static void dm_done(struct request *clone, int error, bool mapped)
|
||||
@ -1192,13 +1259,20 @@ static void dm_done(struct request *clone, int error, bool mapped)
|
||||
static void dm_softirq_done(struct request *rq)
|
||||
{
|
||||
bool mapped = true;
|
||||
struct dm_rq_target_io *tio = rq->special;
|
||||
struct dm_rq_target_io *tio = tio_from_request(rq);
|
||||
struct request *clone = tio->clone;
|
||||
int rw;
|
||||
|
||||
if (!clone) {
|
||||
blk_end_request_all(rq, tio->error);
|
||||
rq_completed(tio->md, rq_data_dir(rq), false);
|
||||
free_rq_tio(tio);
|
||||
rw = rq_data_dir(rq);
|
||||
if (!rq->q->mq_ops) {
|
||||
blk_end_request_all(rq, tio->error);
|
||||
rq_completed(tio->md, rw, false);
|
||||
free_rq_tio(tio);
|
||||
} else {
|
||||
blk_mq_end_request(rq, tio->error);
|
||||
rq_completed(tio->md, rw, false);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1214,7 +1288,7 @@ static void dm_softirq_done(struct request *rq)
|
||||
*/
|
||||
static void dm_complete_request(struct request *rq, int error)
|
||||
{
|
||||
struct dm_rq_target_io *tio = rq->special;
|
||||
struct dm_rq_target_io *tio = tio_from_request(rq);
|
||||
|
||||
tio->error = error;
|
||||
blk_complete_request(rq);
|
||||
@ -1233,7 +1307,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with the clone's queue lock held
|
||||
* Called with the clone's queue lock held (for non-blk-mq)
|
||||
*/
|
||||
static void end_clone_request(struct request *clone, int error)
|
||||
{
|
||||
@ -1693,7 +1767,7 @@ out:
|
||||
* The request function that just remaps the bio built up by
|
||||
* dm_merge_bvec.
|
||||
*/
|
||||
static void _dm_request(struct request_queue *q, struct bio *bio)
|
||||
static void dm_make_request(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
int rw = bio_data_dir(bio);
|
||||
struct mapped_device *md = q->queuedata;
|
||||
@ -1725,16 +1799,6 @@ int dm_request_based(struct mapped_device *md)
|
||||
return blk_queue_stackable(md->queue);
|
||||
}
|
||||
|
||||
static void dm_request(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct mapped_device *md = q->queuedata;
|
||||
|
||||
if (dm_request_based(md))
|
||||
blk_queue_bio(q, bio);
|
||||
else
|
||||
_dm_request(q, bio);
|
||||
}
|
||||
|
||||
static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
|
||||
{
|
||||
int r;
|
||||
@ -1787,15 +1851,25 @@ static int setup_clone(struct request *clone, struct request *rq,
|
||||
static struct request *clone_rq(struct request *rq, struct mapped_device *md,
|
||||
struct dm_rq_target_io *tio, gfp_t gfp_mask)
|
||||
{
|
||||
struct request *clone = alloc_clone_request(md, gfp_mask);
|
||||
/*
|
||||
* Do not allocate a clone if tio->clone was already set
|
||||
* (see: dm_mq_queue_rq).
|
||||
*/
|
||||
bool alloc_clone = !tio->clone;
|
||||
struct request *clone;
|
||||
|
||||
if (!clone)
|
||||
return NULL;
|
||||
if (alloc_clone) {
|
||||
clone = alloc_clone_request(md, gfp_mask);
|
||||
if (!clone)
|
||||
return NULL;
|
||||
} else
|
||||
clone = tio->clone;
|
||||
|
||||
blk_rq_init(NULL, clone);
|
||||
if (setup_clone(clone, rq, tio, gfp_mask)) {
|
||||
/* -ENOMEM */
|
||||
free_clone_request(md, clone);
|
||||
if (alloc_clone)
|
||||
free_clone_request(md, clone);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1804,6 +1878,19 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
|
||||
|
||||
static void map_tio_request(struct kthread_work *work);
|
||||
|
||||
static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
|
||||
struct mapped_device *md)
|
||||
{
|
||||
tio->md = md;
|
||||
tio->ti = NULL;
|
||||
tio->clone = NULL;
|
||||
tio->orig = rq;
|
||||
tio->error = 0;
|
||||
memset(&tio->info, 0, sizeof(tio->info));
|
||||
if (md->kworker_task)
|
||||
init_kthread_work(&tio->work, map_tio_request);
|
||||
}
|
||||
|
||||
static struct dm_rq_target_io *prep_tio(struct request *rq,
|
||||
struct mapped_device *md, gfp_t gfp_mask)
|
||||
{
|
||||
@ -1815,13 +1902,7 @@ static struct dm_rq_target_io *prep_tio(struct request *rq,
|
||||
if (!tio)
|
||||
return NULL;
|
||||
|
||||
tio->md = md;
|
||||
tio->ti = NULL;
|
||||
tio->clone = NULL;
|
||||
tio->orig = rq;
|
||||
tio->error = 0;
|
||||
memset(&tio->info, 0, sizeof(tio->info));
|
||||
init_kthread_work(&tio->work, map_tio_request);
|
||||
init_tio(tio, rq, md);
|
||||
|
||||
table = dm_get_live_table(md, &srcu_idx);
|
||||
if (!dm_table_mq_request_based(table)) {
|
||||
@ -1865,11 +1946,11 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
|
||||
* DM_MAPIO_REQUEUE : the original request needs to be requeued
|
||||
* < 0 : the request was completed due to failure
|
||||
*/
|
||||
static int map_request(struct dm_target *ti, struct request *rq,
|
||||
static int map_request(struct dm_rq_target_io *tio, struct request *rq,
|
||||
struct mapped_device *md)
|
||||
{
|
||||
int r;
|
||||
struct dm_rq_target_io *tio = rq->special;
|
||||
struct dm_target *ti = tio->ti;
|
||||
struct request *clone = NULL;
|
||||
|
||||
if (tio->clone) {
|
||||
@ -1884,7 +1965,7 @@ static int map_request(struct dm_target *ti, struct request *rq,
|
||||
}
|
||||
if (IS_ERR(clone))
|
||||
return DM_MAPIO_REQUEUE;
|
||||
if (setup_clone(clone, rq, tio, GFP_KERNEL)) {
|
||||
if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
|
||||
/* -ENOMEM */
|
||||
ti->type->release_clone_rq(clone);
|
||||
return DM_MAPIO_REQUEUE;
|
||||
@ -1925,15 +2006,24 @@ static void map_tio_request(struct kthread_work *work)
|
||||
struct request *rq = tio->orig;
|
||||
struct mapped_device *md = tio->md;
|
||||
|
||||
if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE)
|
||||
if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
|
||||
dm_requeue_unmapped_original_request(md, rq);
|
||||
}
|
||||
|
||||
static void dm_start_request(struct mapped_device *md, struct request *orig)
|
||||
{
|
||||
blk_start_request(orig);
|
||||
if (!orig->q->mq_ops)
|
||||
blk_start_request(orig);
|
||||
else
|
||||
blk_mq_start_request(orig);
|
||||
atomic_inc(&md->pending[rq_data_dir(orig)]);
|
||||
|
||||
if (md->seq_rq_merge_deadline_usecs) {
|
||||
md->last_rq_pos = rq_end_sector(orig);
|
||||
md->last_rq_rw = rq_data_dir(orig);
|
||||
md->last_rq_start_time = ktime_get();
|
||||
}
|
||||
|
||||
/*
|
||||
* Hold the md reference here for the in-flight I/O.
|
||||
* We can't rely on the reference count by device opener,
|
||||
@ -1944,6 +2034,45 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
|
||||
dm_get(md);
|
||||
}
|
||||
|
||||
#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
|
||||
|
||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
|
||||
}
|
||||
|
||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
unsigned deadline;
|
||||
|
||||
if (!dm_request_based(md) || md->use_blk_mq)
|
||||
return count;
|
||||
|
||||
if (kstrtouint(buf, 10, &deadline))
|
||||
return -EINVAL;
|
||||
|
||||
if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
|
||||
deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
|
||||
|
||||
md->seq_rq_merge_deadline_usecs = deadline;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static bool dm_request_peeked_before_merge_deadline(struct mapped_device *md)
|
||||
{
|
||||
ktime_t kt_deadline;
|
||||
|
||||
if (!md->seq_rq_merge_deadline_usecs)
|
||||
return false;
|
||||
|
||||
kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
|
||||
kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
|
||||
|
||||
return !ktime_after(ktime_get(), kt_deadline);
|
||||
}
|
||||
|
||||
/*
|
||||
* q->request_fn for request-based dm.
|
||||
* Called with the queue lock held.
|
||||
@ -1967,7 +2096,7 @@ static void dm_request_fn(struct request_queue *q)
|
||||
while (!blk_queue_stopped(q)) {
|
||||
rq = blk_peek_request(q);
|
||||
if (!rq)
|
||||
goto delay_and_out;
|
||||
goto out;
|
||||
|
||||
/* always use block 0 to find the target for flushes for now */
|
||||
pos = 0;
|
||||
@ -1986,12 +2115,17 @@ static void dm_request_fn(struct request_queue *q)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dm_request_peeked_before_merge_deadline(md) &&
|
||||
md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
|
||||
md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq))
|
||||
goto delay_and_out;
|
||||
|
||||
if (ti->type->busy && ti->type->busy(ti))
|
||||
goto delay_and_out;
|
||||
|
||||
dm_start_request(md, rq);
|
||||
|
||||
tio = rq->special;
|
||||
tio = tio_from_request(rq);
|
||||
/* Establish tio->ti before queuing work (map_tio_request) */
|
||||
tio->ti = ti;
|
||||
queue_kthread_work(&md->kworker, &tio->work);
|
||||
@ -2001,33 +2135,11 @@ static void dm_request_fn(struct request_queue *q)
|
||||
goto out;
|
||||
|
||||
delay_and_out:
|
||||
blk_delay_queue(q, HZ / 10);
|
||||
blk_delay_queue(q, HZ / 100);
|
||||
out:
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
}
|
||||
|
||||
int dm_underlying_device_busy(struct request_queue *q)
|
||||
{
|
||||
return blk_lld_busy(q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_underlying_device_busy);
|
||||
|
||||
static int dm_lld_busy(struct request_queue *q)
|
||||
{
|
||||
int r;
|
||||
struct mapped_device *md = q->queuedata;
|
||||
struct dm_table *map = dm_get_live_table_fast(md);
|
||||
|
||||
if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
|
||||
r = 1;
|
||||
else
|
||||
r = dm_table_any_busy_target(map);
|
||||
|
||||
dm_put_live_table_fast(md);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int dm_any_congested(void *congested_data, int bdi_bits)
|
||||
{
|
||||
int r = bdi_bits;
|
||||
@ -2110,7 +2222,7 @@ static void dm_init_md_queue(struct mapped_device *md)
|
||||
{
|
||||
/*
|
||||
* Request-based dm devices cannot be stacked on top of bio-based dm
|
||||
* devices. The type of this dm device has not been decided yet.
|
||||
* devices. The type of this dm device may not have been decided yet.
|
||||
* The type is decided at the first table loading time.
|
||||
* To prevent problematic device stacking, clear the queue flag
|
||||
* for request stacking support until then.
|
||||
@ -2118,13 +2230,21 @@ static void dm_init_md_queue(struct mapped_device *md)
|
||||
* This queue is new, so no concurrency on the queue_flags.
|
||||
*/
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
|
||||
}
|
||||
|
||||
static void dm_init_old_md_queue(struct mapped_device *md)
|
||||
{
|
||||
md->use_blk_mq = false;
|
||||
dm_init_md_queue(md);
|
||||
|
||||
/*
|
||||
* Initialize aspects of queue that aren't relevant for blk-mq
|
||||
*/
|
||||
md->queue->queuedata = md;
|
||||
md->queue->backing_dev_info.congested_fn = dm_any_congested;
|
||||
md->queue->backing_dev_info.congested_data = md;
|
||||
blk_queue_make_request(md->queue, dm_request);
|
||||
|
||||
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
|
||||
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2156,6 +2276,7 @@ static struct mapped_device *alloc_dev(int minor)
|
||||
if (r < 0)
|
||||
goto bad_io_barrier;
|
||||
|
||||
md->use_blk_mq = use_blk_mq;
|
||||
md->type = DM_TYPE_NONE;
|
||||
mutex_init(&md->suspend_lock);
|
||||
mutex_init(&md->type_lock);
|
||||
@ -2267,6 +2388,8 @@ static void free_dev(struct mapped_device *md)
|
||||
del_gendisk(md->disk);
|
||||
put_disk(md->disk);
|
||||
blk_cleanup_queue(md->queue);
|
||||
if (md->use_blk_mq)
|
||||
blk_mq_free_tag_set(&md->tag_set);
|
||||
bdput(md->bdev);
|
||||
free_minor(minor);
|
||||
|
||||
@ -2278,7 +2401,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
|
||||
{
|
||||
struct dm_md_mempools *p = dm_table_get_md_mempools(t);
|
||||
|
||||
if (md->io_pool && md->bs) {
|
||||
if (md->bs) {
|
||||
/* The md already has necessary mempools. */
|
||||
if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) {
|
||||
/*
|
||||
@ -2310,7 +2433,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
|
||||
p->bs = NULL;
|
||||
|
||||
out:
|
||||
/* mempool bind completed, now no need any mempools in the table */
|
||||
/* mempool bind completed, no longer need any mempools in the table */
|
||||
dm_table_free_md_mempools(t);
|
||||
}
|
||||
|
||||
@ -2357,7 +2480,7 @@ int dm_queue_merge_is_compulsory(struct request_queue *q)
|
||||
if (!q->merge_bvec_fn)
|
||||
return 0;
|
||||
|
||||
if (q->make_request_fn == dm_request) {
|
||||
if (q->make_request_fn == dm_make_request) {
|
||||
dev_md = q->queuedata;
|
||||
if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
|
||||
return 0;
|
||||
@ -2426,7 +2549,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
|
||||
* This must be done before setting the queue restrictions,
|
||||
* because request-based dm may be run just after the setting.
|
||||
*/
|
||||
if (dm_table_request_based(t) && !blk_queue_stopped(q))
|
||||
if (dm_table_request_based(t))
|
||||
stop_queue(q);
|
||||
|
||||
__bind_mempools(md, t);
|
||||
@ -2508,14 +2631,6 @@ unsigned dm_get_md_type(struct mapped_device *md)
|
||||
return md->type;
|
||||
}
|
||||
|
||||
static bool dm_md_type_request_based(struct mapped_device *md)
|
||||
{
|
||||
unsigned table_type = dm_get_md_type(md);
|
||||
|
||||
return (table_type == DM_TYPE_REQUEST_BASED ||
|
||||
table_type == DM_TYPE_MQ_REQUEST_BASED);
|
||||
}
|
||||
|
||||
struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
|
||||
{
|
||||
return md->immutable_target_type;
|
||||
@ -2532,6 +2647,14 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
|
||||
|
||||
static void init_rq_based_worker_thread(struct mapped_device *md)
|
||||
{
|
||||
/* Initialize the request-based DM worker thread */
|
||||
init_kthread_worker(&md->kworker);
|
||||
md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
|
||||
"kdmwork-%s", dm_device_name(md));
|
||||
}
|
||||
|
||||
/*
|
||||
* Fully initialize a request-based queue (->elevator, ->request_fn, etc).
|
||||
*/
|
||||
@ -2540,27 +2663,160 @@ static int dm_init_request_based_queue(struct mapped_device *md)
|
||||
struct request_queue *q = NULL;
|
||||
|
||||
if (md->queue->elevator)
|
||||
return 1;
|
||||
return 0;
|
||||
|
||||
/* Fully initialize the queue */
|
||||
q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
|
||||
if (!q)
|
||||
return 0;
|
||||
return -EINVAL;
|
||||
|
||||
/* disable dm_request_fn's merge heuristic by default */
|
||||
md->seq_rq_merge_deadline_usecs = 0;
|
||||
|
||||
md->queue = q;
|
||||
dm_init_md_queue(md);
|
||||
dm_init_old_md_queue(md);
|
||||
blk_queue_softirq_done(md->queue, dm_softirq_done);
|
||||
blk_queue_prep_rq(md->queue, dm_prep_fn);
|
||||
blk_queue_lld_busy(md->queue, dm_lld_busy);
|
||||
|
||||
/* Also initialize the request-based DM worker thread */
|
||||
init_kthread_worker(&md->kworker);
|
||||
md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
|
||||
"kdmwork-%s", dm_device_name(md));
|
||||
init_rq_based_worker_thread(md);
|
||||
|
||||
elv_register_queue(md->queue);
|
||||
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dm_mq_init_request(void *data, struct request *rq,
|
||||
unsigned int hctx_idx, unsigned int request_idx,
|
||||
unsigned int numa_node)
|
||||
{
|
||||
struct mapped_device *md = data;
|
||||
struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
|
||||
|
||||
/*
|
||||
* Must initialize md member of tio, otherwise it won't
|
||||
* be available in dm_mq_queue_rq.
|
||||
*/
|
||||
tio->md = md;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *rq = bd->rq;
|
||||
struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
|
||||
struct mapped_device *md = tio->md;
|
||||
int srcu_idx;
|
||||
struct dm_table *map = dm_get_live_table(md, &srcu_idx);
|
||||
struct dm_target *ti;
|
||||
sector_t pos;
|
||||
|
||||
/* always use block 0 to find the target for flushes for now */
|
||||
pos = 0;
|
||||
if (!(rq->cmd_flags & REQ_FLUSH))
|
||||
pos = blk_rq_pos(rq);
|
||||
|
||||
ti = dm_table_find_target(map, pos);
|
||||
if (!dm_target_is_valid(ti)) {
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
DMERR_LIMIT("request attempted access beyond the end of device");
|
||||
/*
|
||||
* Must perform setup, that rq_completed() requires,
|
||||
* before returning BLK_MQ_RQ_QUEUE_ERROR
|
||||
*/
|
||||
dm_start_request(md, rq);
|
||||
return BLK_MQ_RQ_QUEUE_ERROR;
|
||||
}
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
|
||||
if (ti->type->busy && ti->type->busy(ti))
|
||||
return BLK_MQ_RQ_QUEUE_BUSY;
|
||||
|
||||
dm_start_request(md, rq);
|
||||
|
||||
/* Init tio using md established in .init_request */
|
||||
init_tio(tio, rq, md);
|
||||
|
||||
/*
|
||||
* Establish tio->ti before queuing work (map_tio_request)
|
||||
* or making direct call to map_request().
|
||||
*/
|
||||
tio->ti = ti;
|
||||
|
||||
/* Clone the request if underlying devices aren't blk-mq */
|
||||
if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
|
||||
/* clone request is allocated at the end of the pdu */
|
||||
tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
|
||||
if (!clone_rq(rq, md, tio, GFP_ATOMIC))
|
||||
return BLK_MQ_RQ_QUEUE_BUSY;
|
||||
queue_kthread_work(&md->kworker, &tio->work);
|
||||
} else {
|
||||
/* Direct call is fine since .queue_rq allows allocations */
|
||||
if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
|
||||
dm_requeue_unmapped_original_request(md, rq);
|
||||
}
|
||||
|
||||
return BLK_MQ_RQ_QUEUE_OK;
|
||||
}
|
||||
|
||||
static struct blk_mq_ops dm_mq_ops = {
|
||||
.queue_rq = dm_mq_queue_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.complete = dm_softirq_done,
|
||||
.init_request = dm_mq_init_request,
|
||||
};
|
||||
|
||||
static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
|
||||
{
|
||||
unsigned md_type = dm_get_md_type(md);
|
||||
struct request_queue *q;
|
||||
int err;
|
||||
|
||||
memset(&md->tag_set, 0, sizeof(md->tag_set));
|
||||
md->tag_set.ops = &dm_mq_ops;
|
||||
md->tag_set.queue_depth = BLKDEV_MAX_RQ;
|
||||
md->tag_set.numa_node = NUMA_NO_NODE;
|
||||
md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
|
||||
md->tag_set.nr_hw_queues = 1;
|
||||
if (md_type == DM_TYPE_REQUEST_BASED) {
|
||||
/* make the memory for non-blk-mq clone part of the pdu */
|
||||
md->tag_set.cmd_size = sizeof(struct dm_rq_target_io) + sizeof(struct request);
|
||||
} else
|
||||
md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
|
||||
md->tag_set.driver_data = md;
|
||||
|
||||
err = blk_mq_alloc_tag_set(&md->tag_set);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
q = blk_mq_init_allocated_queue(&md->tag_set, md->queue);
|
||||
if (IS_ERR(q)) {
|
||||
err = PTR_ERR(q);
|
||||
goto out_tag_set;
|
||||
}
|
||||
md->queue = q;
|
||||
dm_init_md_queue(md);
|
||||
|
||||
/* backfill 'mq' sysfs registration normally done in blk_register_queue */
|
||||
blk_mq_register_disk(md->disk);
|
||||
|
||||
if (md_type == DM_TYPE_REQUEST_BASED)
|
||||
init_rq_based_worker_thread(md);
|
||||
|
||||
return 0;
|
||||
|
||||
out_tag_set:
|
||||
blk_mq_free_tag_set(&md->tag_set);
|
||||
return err;
|
||||
}
|
||||
|
||||
static unsigned filter_md_type(unsigned type, struct mapped_device *md)
|
||||
{
|
||||
if (type == DM_TYPE_BIO_BASED)
|
||||
return type;
|
||||
|
||||
return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2568,9 +2824,29 @@ static int dm_init_request_based_queue(struct mapped_device *md)
|
||||
*/
|
||||
int dm_setup_md_queue(struct mapped_device *md)
|
||||
{
|
||||
if (dm_md_type_request_based(md) && !dm_init_request_based_queue(md)) {
|
||||
DMWARN("Cannot initialize queue for request-based mapped device");
|
||||
return -EINVAL;
|
||||
int r;
|
||||
unsigned md_type = filter_md_type(dm_get_md_type(md), md);
|
||||
|
||||
switch (md_type) {
|
||||
case DM_TYPE_REQUEST_BASED:
|
||||
r = dm_init_request_based_queue(md);
|
||||
if (r) {
|
||||
DMWARN("Cannot initialize queue for request-based mapped device");
|
||||
return r;
|
||||
}
|
||||
break;
|
||||
case DM_TYPE_MQ_REQUEST_BASED:
|
||||
r = dm_init_request_based_blk_mq_queue(md);
|
||||
if (r) {
|
||||
DMWARN("Cannot initialize queue for request-based blk-mq mapped device");
|
||||
return r;
|
||||
}
|
||||
break;
|
||||
case DM_TYPE_BIO_BASED:
|
||||
dm_init_old_md_queue(md);
|
||||
blk_queue_make_request(md->queue, dm_make_request);
|
||||
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -2654,7 +2930,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
|
||||
set_bit(DMF_FREEING, &md->flags);
|
||||
spin_unlock(&_minor_lock);
|
||||
|
||||
if (dm_request_based(md))
|
||||
if (dm_request_based(md) && md->kworker_task)
|
||||
flush_kthread_worker(&md->kworker);
|
||||
|
||||
/*
|
||||
@ -2908,7 +3184,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
|
||||
*/
|
||||
if (dm_request_based(md)) {
|
||||
stop_queue(md->queue);
|
||||
flush_kthread_worker(&md->kworker);
|
||||
if (md->kworker_task)
|
||||
flush_kthread_worker(&md->kworker);
|
||||
}
|
||||
|
||||
flush_workqueue(md->wq);
|
||||
@ -3206,6 +3483,7 @@ struct gendisk *dm_disk(struct mapped_device *md)
|
||||
{
|
||||
return md->disk;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_disk);
|
||||
|
||||
struct kobject *dm_kobject(struct mapped_device *md)
|
||||
{
|
||||
@ -3253,16 +3531,19 @@ int dm_noflush_suspending(struct dm_target *ti)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
|
||||
|
||||
struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size)
|
||||
struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
|
||||
unsigned integrity, unsigned per_bio_data_size)
|
||||
{
|
||||
struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
|
||||
struct kmem_cache *cachep;
|
||||
struct kmem_cache *cachep = NULL;
|
||||
unsigned int pool_size = 0;
|
||||
unsigned int front_pad;
|
||||
|
||||
if (!pools)
|
||||
return NULL;
|
||||
|
||||
type = filter_md_type(type, md);
|
||||
|
||||
switch (type) {
|
||||
case DM_TYPE_BIO_BASED:
|
||||
cachep = _io_cache;
|
||||
@ -3270,13 +3551,13 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
|
||||
front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
|
||||
break;
|
||||
case DM_TYPE_REQUEST_BASED:
|
||||
cachep = _rq_tio_cache;
|
||||
pool_size = dm_get_reserved_rq_based_ios();
|
||||
pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
|
||||
if (!pools->rq_pool)
|
||||
goto out;
|
||||
/* fall through to setup remaining rq-based pools */
|
||||
case DM_TYPE_MQ_REQUEST_BASED:
|
||||
cachep = _rq_tio_cache;
|
||||
if (!pool_size)
|
||||
pool_size = dm_get_reserved_rq_based_ios();
|
||||
front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
|
||||
@ -3284,12 +3565,14 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
|
||||
WARN_ON(per_bio_data_size != 0);
|
||||
break;
|
||||
default:
|
||||
goto out;
|
||||
BUG();
|
||||
}
|
||||
|
||||
pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
|
||||
if (!pools->io_pool)
|
||||
goto out;
|
||||
if (cachep) {
|
||||
pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
|
||||
if (!pools->io_pool)
|
||||
goto out;
|
||||
}
|
||||
|
||||
pools->bs = bioset_create_nobvec(pool_size, front_pad);
|
||||
if (!pools->bs)
|
||||
@ -3346,6 +3629,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
|
||||
module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
|
||||
|
||||
module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
|
||||
|
||||
MODULE_DESCRIPTION(DM_NAME " driver");
|
||||
MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -70,7 +70,6 @@ void dm_table_presuspend_undo_targets(struct dm_table *t);
|
||||
void dm_table_postsuspend_targets(struct dm_table *t);
|
||||
int dm_table_resume_targets(struct dm_table *t);
|
||||
int dm_table_any_congested(struct dm_table *t, int bdi_bits);
|
||||
int dm_table_any_busy_target(struct dm_table *t);
|
||||
unsigned dm_table_get_type(struct dm_table *t);
|
||||
struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
|
||||
bool dm_table_request_based(struct dm_table *t);
|
||||
@ -212,6 +211,8 @@ int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
|
||||
void dm_internal_suspend(struct mapped_device *md);
|
||||
void dm_internal_resume(struct mapped_device *md);
|
||||
|
||||
bool dm_use_blk_mq(struct mapped_device *md);
|
||||
|
||||
int dm_io_init(void);
|
||||
void dm_io_exit(void);
|
||||
|
||||
@ -221,7 +222,8 @@ void dm_kcopyd_exit(void);
|
||||
/*
|
||||
* Mempool operations
|
||||
*/
|
||||
struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size);
|
||||
struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
|
||||
unsigned integrity, unsigned per_bio_data_size);
|
||||
void dm_free_md_mempools(struct dm_md_mempools *pools);
|
||||
|
||||
/*
|
||||
@ -235,4 +237,8 @@ static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen
|
||||
return !maxlen || strlen(result) + 1 >= maxlen;
|
||||
}
|
||||
|
||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
|
||||
ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
|
||||
const char *buf, size_t count);
|
||||
|
||||
#endif
|
||||
|
@ -605,9 +605,4 @@ static inline unsigned long to_bytes(sector_t n)
|
||||
return (n << SECTOR_SHIFT);
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* Helper for block layer and dm core operations
|
||||
*---------------------------------------------------------------*/
|
||||
int dm_underlying_device_busy(struct request_queue *q);
|
||||
|
||||
#endif /* _LINUX_DEVICE_MAPPER_H */
|
||||
|
@ -92,6 +92,6 @@ extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
|
||||
extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
|
||||
extern void mark_mounts_for_expiry(struct list_head *mounts);
|
||||
|
||||
extern dev_t name_to_dev_t(char *name);
|
||||
extern dev_t name_to_dev_t(const char *name);
|
||||
|
||||
#endif /* _LINUX_MOUNT_H */
|
||||
|
@ -267,9 +267,9 @@ enum {
|
||||
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
|
||||
|
||||
#define DM_VERSION_MAJOR 4
|
||||
#define DM_VERSION_MINOR 30
|
||||
#define DM_VERSION_MINOR 31
|
||||
#define DM_VERSION_PATCHLEVEL 0
|
||||
#define DM_VERSION_EXTRA "-ioctl (2014-12-22)"
|
||||
#define DM_VERSION_EXTRA "-ioctl (2015-3-12)"
|
||||
|
||||
/* Status bits */
|
||||
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
|
||||
|
@ -207,7 +207,7 @@ done:
|
||||
* bangs.
|
||||
*/
|
||||
|
||||
dev_t name_to_dev_t(char *name)
|
||||
dev_t name_to_dev_t(const char *name)
|
||||
{
|
||||
char s[32];
|
||||
char *p;
|
||||
@ -226,8 +226,9 @@ dev_t name_to_dev_t(char *name)
|
||||
|
||||
if (strncmp(name, "/dev/", 5) != 0) {
|
||||
unsigned maj, min;
|
||||
char dummy;
|
||||
|
||||
if (sscanf(name, "%u:%u", &maj, &min) == 2) {
|
||||
if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2) {
|
||||
res = MKDEV(maj, min);
|
||||
if (maj != MAJOR(res) || min != MINOR(res))
|
||||
goto fail;
|
||||
@ -286,6 +287,7 @@ fail:
|
||||
done:
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(name_to_dev_t);
|
||||
|
||||
static int __init root_dev_setup(char *line)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user