2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Internal header file for device mapper
|
|
|
|
*
|
|
|
|
* Copyright (C) 2001, 2002 Sistina Software
|
2006-06-26 15:27:32 +08:00
|
|
|
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
|
|
|
* This file is released under the LGPL.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef DM_INTERNAL_H
|
|
|
|
#define DM_INTERNAL_H
|
|
|
|
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/device-mapper.h>
|
|
|
|
#include <linux/list.h>
|
2016-05-13 04:28:10 +08:00
|
|
|
#include <linux/moduleparam.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/blkdev.h>
|
2015-05-23 05:13:32 +08:00
|
|
|
#include <linux/backing-dev.h>
|
2006-03-27 17:17:54 +08:00
|
|
|
#include <linux/hdreg.h>
|
2014-01-07 12:01:22 +08:00
|
|
|
#include <linux/completion.h>
|
2014-01-14 08:37:54 +08:00
|
|
|
#include <linux/kobject.h>
|
2017-10-20 15:37:38 +08:00
|
|
|
#include <linux/refcount.h>
|
2023-03-28 01:59:25 +08:00
|
|
|
#include <linux/log2.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2013-08-16 22:54:23 +08:00
|
|
|
#include "dm-stats.h"
|
|
|
|
|
2006-12-08 18:41:04 +08:00
|
|
|
/*
|
|
|
|
* Suspend feature flags
|
|
|
|
*/
|
|
|
|
#define DM_SUSPEND_LOCKFS_FLAG (1 << 0)
|
2006-12-08 18:41:07 +08:00
|
|
|
#define DM_SUSPEND_NOFLUSH_FLAG (1 << 1)
|
2006-12-08 18:41:04 +08:00
|
|
|
|
2012-07-27 22:08:16 +08:00
|
|
|
/*
|
|
|
|
* Status feature flags
|
|
|
|
*/
|
|
|
|
#define DM_STATUS_NOFLUSH_FLAG (1 << 0)
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* List of devices that a metadevice uses and should open/close.
|
|
|
|
*/
|
2008-10-10 20:37:09 +08:00
|
|
|
struct dm_dev_internal {
|
2005-04-17 06:20:36 +08:00
|
|
|
struct list_head list;
|
2017-10-20 15:37:38 +08:00
|
|
|
refcount_t count;
|
2014-08-14 02:53:43 +08:00
|
|
|
struct dm_dev *dm_dev;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct dm_table;
|
dm: enable request based option
This patch enables request-based dm.
o Request-based dm and bio-based dm coexist, since there are
some target drivers which are more fitting to bio-based dm.
Also, there are other bio-based devices in the kernel
(e.g. md, loop).
Since bio-based device can't receive struct request,
there are some limitations on device stacking between
bio-based and request-based.
type of underlying device
bio-based request-based
----------------------------------------------
bio-based OK OK
request-based -- OK
The device type is recognized by the queue flag in the kernel,
so dm follows that.
o The type of a dm device is decided at the first table binding time.
Once the type of a dm device is decided, the type can't be changed.
o Mempool allocations are deferred to at the table loading time, since
mempools for request-based dm are different from those for bio-based
dm and needed mempool type is fixed by the type of table.
o Currently, request-based dm supports only tables that have a single
target. To support multiple targets, we need to support request
splitting or prevent bio/request from spanning multiple targets.
The former needs lots of changes in the block layer, and the latter
needs that all target drivers support merge() function.
Both will take a time.
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:36 +08:00
|
|
|
struct dm_md_mempools;
|
dm: introduce zone append emulation
For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.
To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().
Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.
Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.
The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().
For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.
All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2021-05-26 05:25:00 +08:00
|
|
|
struct dm_target_io;
|
|
|
|
struct dm_io;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2023-01-26 22:48:30 +08:00
|
|
|
/*
|
|
|
|
*---------------------------------------------------------------
|
2006-06-26 15:27:33 +08:00
|
|
|
* Internal table functions.
|
2023-01-26 22:48:30 +08:00
|
|
|
*---------------------------------------------------------------
|
|
|
|
*/
|
2005-04-17 06:20:36 +08:00
|
|
|
void dm_table_event_callback(struct dm_table *t,
|
|
|
|
void (*fn)(void *), void *context);
|
|
|
|
struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
|
2012-09-27 06:45:45 +08:00
|
|
|
bool dm_table_has_no_data_devices(struct dm_table *table);
|
2009-06-22 17:12:34 +08:00
|
|
|
int dm_calculate_queue_limits(struct dm_table *table,
|
|
|
|
struct queue_limits *limits);
|
dm: introduce zone append emulation
For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.
To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().
Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.
Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.
The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().
For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.
All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2021-05-26 05:25:00 +08:00
|
|
|
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
|
|
|
struct queue_limits *limits);
|
2005-04-17 06:20:36 +08:00
|
|
|
struct list_head *dm_table_get_devices(struct dm_table *t);
|
|
|
|
void dm_table_presuspend_targets(struct dm_table *t);
|
2014-10-29 08:13:31 +08:00
|
|
|
void dm_table_presuspend_undo_targets(struct dm_table *t);
|
2005-04-17 06:20:36 +08:00
|
|
|
void dm_table_postsuspend_targets(struct dm_table *t);
|
2006-10-03 16:15:36 +08:00
|
|
|
int dm_table_resume_targets(struct dm_table *t);
|
2017-04-28 01:11:23 +08:00
|
|
|
enum dm_queue_mode dm_table_get_type(struct dm_table *t);
|
2011-11-01 04:19:04 +08:00
|
|
|
struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
|
2016-02-01 06:22:27 +08:00
|
|
|
struct dm_target *dm_table_get_immutable_target(struct dm_table *t);
|
2016-02-07 07:38:46 +08:00
|
|
|
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
|
2016-06-23 07:54:53 +08:00
|
|
|
bool dm_table_bio_based(struct dm_table *t);
|
dm: enable request based option
This patch enables request-based dm.
o Request-based dm and bio-based dm coexist, since there are
some target drivers which are more fitting to bio-based dm.
Also, there are other bio-based devices in the kernel
(e.g. md, loop).
Since bio-based device can't receive struct request,
there are some limitations on device stacking between
bio-based and request-based.
type of underlying device
bio-based request-based
----------------------------------------------
bio-based OK OK
request-based -- OK
The device type is recognized by the queue flag in the kernel,
so dm follows that.
o The type of a dm device is decided at the first table binding time.
Once the type of a dm device is decided, the type can't be changed.
o Mempool allocations are deferred to at the table loading time, since
mempools for request-based dm are different from those for bio-based
dm and needed mempool type is fixed by the type of table.
o Currently, request-based dm supports only tables that have a single
target. To support multiple targets, we need to support request
splitting or prevent bio/request from spanning multiple targets.
The former needs lots of changes in the block layer, and the latter
needs that all target drivers support merge() function.
Both will take a time.
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:36 +08:00
|
|
|
bool dm_table_request_based(struct dm_table *t);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-08-12 11:14:01 +08:00
|
|
|
void dm_lock_md_type(struct mapped_device *md);
|
|
|
|
void dm_unlock_md_type(struct mapped_device *md);
|
2017-04-28 01:11:23 +08:00
|
|
|
void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type);
|
|
|
|
enum dm_queue_mode dm_get_md_type(struct mapped_device *md);
|
2011-11-01 04:19:04 +08:00
|
|
|
struct target_type *dm_get_immutable_target_type(struct mapped_device *md);
|
2010-08-12 11:14:01 +08:00
|
|
|
|
2016-02-01 01:05:42 +08:00
|
|
|
int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
|
2010-08-12 11:14:02 +08:00
|
|
|
|
2013-08-23 06:21:38 +08:00
|
|
|
/*
|
|
|
|
* To check whether the target type is bio-based or not (request-based).
|
|
|
|
*/
|
|
|
|
#define dm_target_bio_based(t) ((t)->type->map != NULL)
|
|
|
|
|
dm: enable request based option
This patch enables request-based dm.
o Request-based dm and bio-based dm coexist, since there are
some target drivers which are more fitting to bio-based dm.
Also, there are other bio-based devices in the kernel
(e.g. md, loop).
Since bio-based device can't receive struct request,
there are some limitations on device stacking between
bio-based and request-based.
type of underlying device
bio-based request-based
----------------------------------------------
bio-based OK OK
request-based -- OK
The device type is recognized by the queue flag in the kernel,
so dm follows that.
o The type of a dm device is decided at the first table binding time.
Once the type of a dm device is decided, the type can't be changed.
o Mempool allocations are deferred to at the table loading time, since
mempools for request-based dm are different from those for bio-based
dm and needed mempool type is fixed by the type of table.
o Currently, request-based dm supports only tables that have a single
target. To support multiple targets, we need to support request
splitting or prevent bio/request from spanning multiple targets.
The former needs lots of changes in the block layer, and the latter
needs that all target drivers support merge() function.
Both will take a time.
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:36 +08:00
|
|
|
/*
|
|
|
|
* To check whether the target type is request-based or not (bio-based).
|
|
|
|
*/
|
2017-01-23 01:32:46 +08:00
|
|
|
#define dm_target_request_based(t) ((t)->type->clone_and_map_rq != NULL)
|
dm: enable request based option
This patch enables request-based dm.
o Request-based dm and bio-based dm coexist, since there are
some target drivers which are more fitting to bio-based dm.
Also, there are other bio-based devices in the kernel
(e.g. md, loop).
Since bio-based device can't receive struct request,
there are some limitations on device stacking between
bio-based and request-based.
type of underlying device
bio-based request-based
----------------------------------------------
bio-based OK OK
request-based -- OK
The device type is recognized by the queue flag in the kernel,
so dm follows that.
o The type of a dm device is decided at the first table binding time.
Once the type of a dm device is decided, the type can't be changed.
o Mempool allocations are deferred to at the table loading time, since
mempools for request-based dm are different from those for bio-based
dm and needed mempool type is fixed by the type of table.
o Currently, request-based dm supports only tables that have a single
target. To support multiple targets, we need to support request
splitting or prevent bio/request from spanning multiple targets.
The former needs lots of changes in the block layer, and the latter
needs that all target drivers support merge() function.
Both will take a time.
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:36 +08:00
|
|
|
|
2013-08-23 06:21:38 +08:00
|
|
|
/*
|
|
|
|
* To check whether the target type is a hybrid (capable of being
|
|
|
|
* either request-based or bio-based).
|
|
|
|
*/
|
|
|
|
#define dm_target_hybrid(t) (dm_target_bio_based(t) && dm_target_request_based(t))
|
|
|
|
|
2021-05-26 05:24:56 +08:00
|
|
|
/*
|
|
|
|
* Zoned targets related functions.
|
|
|
|
*/
|
dm: introduce zone append emulation
For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.
To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().
Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.
Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.
The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().
For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.
All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2021-05-26 05:25:00 +08:00
|
|
|
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
|
|
|
|
void dm_zone_endio(struct dm_io *io, struct bio *clone);
|
2021-05-26 05:24:56 +08:00
|
|
|
#ifdef CONFIG_BLK_DEV_ZONED
|
dm: introduce zone append emulation
For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.
To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().
Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.
Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.
The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().
For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.
All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2021-05-26 05:25:00 +08:00
|
|
|
void dm_cleanup_zoned_dev(struct mapped_device *md);
|
2021-05-26 05:24:56 +08:00
|
|
|
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
|
|
|
|
unsigned int nr_zones, report_zones_cb cb, void *data);
|
2021-05-26 05:24:58 +08:00
|
|
|
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
|
dm: introduce zone append emulation
For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.
To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().
Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.
Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.
The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().
For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.
All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2021-05-26 05:25:00 +08:00
|
|
|
int dm_zone_map_bio(struct dm_target_io *io);
|
2021-05-26 05:24:56 +08:00
|
|
|
#else
|
dm: introduce zone append emulation
For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.
To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().
Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.
Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.
The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().
For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.
All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2021-05-26 05:25:00 +08:00
|
|
|
static inline void dm_cleanup_zoned_dev(struct mapped_device *md) {}
|
2021-05-26 05:24:56 +08:00
|
|
|
#define dm_blk_report_zones NULL
|
2021-05-26 05:24:58 +08:00
|
|
|
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
dm: introduce zone append emulation
For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.
To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().
Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.
Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.
The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().
For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.
All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2021-05-26 05:25:00 +08:00
|
|
|
static inline int dm_zone_map_bio(struct dm_target_io *tio)
|
|
|
|
{
|
|
|
|
return DM_MAPIO_KILL;
|
|
|
|
}
|
2021-05-26 05:24:56 +08:00
|
|
|
#endif
|
|
|
|
|
2023-01-26 22:48:30 +08:00
|
|
|
/*
|
|
|
|
*---------------------------------------------------------------
|
2005-04-17 06:20:36 +08:00
|
|
|
* A registry of target types.
|
2023-01-26 22:48:30 +08:00
|
|
|
*---------------------------------------------------------------
|
|
|
|
*/
|
2005-04-17 06:20:36 +08:00
|
|
|
int dm_target_init(void);
|
|
|
|
void dm_target_exit(void);
|
|
|
|
struct target_type *dm_get_target_type(const char *name);
|
2009-04-03 02:55:28 +08:00
|
|
|
void dm_put_target_type(struct target_type *tt);
|
2005-04-17 06:20:36 +08:00
|
|
|
int dm_target_iterate(void (*iter_func)(struct target_type *tt,
|
|
|
|
void *param), void *param);
|
|
|
|
|
|
|
|
int dm_split_args(int *argc, char ***argvp, char *input);
|
|
|
|
|
2009-12-11 07:52:20 +08:00
|
|
|
/*
|
|
|
|
* Is this mapped_device being deleted?
|
|
|
|
*/
|
|
|
|
int dm_deleting_md(struct mapped_device *md);
|
|
|
|
|
2009-12-11 07:52:26 +08:00
|
|
|
/*
|
|
|
|
* Is this mapped_device suspended?
|
|
|
|
*/
|
|
|
|
int dm_suspended_md(struct mapped_device *md);
|
|
|
|
|
2014-10-29 06:34:52 +08:00
|
|
|
/*
|
|
|
|
* Internal suspend and resume methods.
|
|
|
|
*/
|
|
|
|
int dm_suspended_internally_md(struct mapped_device *md);
|
|
|
|
void dm_internal_suspend_fast(struct mapped_device *md);
|
|
|
|
void dm_internal_resume_fast(struct mapped_device *md);
|
|
|
|
void dm_internal_suspend_noflush(struct mapped_device *md);
|
|
|
|
void dm_internal_resume(struct mapped_device *md);
|
|
|
|
|
2013-11-02 06:27:41 +08:00
|
|
|
/*
|
|
|
|
* Test if the device is scheduled for deferred remove.
|
|
|
|
*/
|
|
|
|
int dm_test_deferred_remove_flag(struct mapped_device *md);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to remove devices marked for deferred removal.
|
|
|
|
*/
|
|
|
|
void dm_deferred_remove(void);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* The device-mapper can be driven through one of two interfaces;
|
|
|
|
* ioctl or filesystem, depending which patch you have applied.
|
|
|
|
*/
|
|
|
|
int dm_interface_init(void);
|
|
|
|
void dm_interface_exit(void);
|
|
|
|
|
2009-01-06 11:05:12 +08:00
|
|
|
/*
|
|
|
|
* sysfs interface
|
|
|
|
*/
|
|
|
|
int dm_sysfs_init(struct mapped_device *md);
|
|
|
|
void dm_sysfs_exit(struct mapped_device *md);
|
|
|
|
struct kobject *dm_kobject(struct mapped_device *md);
|
|
|
|
struct mapped_device *dm_get_from_kobject(struct kobject *kobj);
|
2014-01-14 08:37:54 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The kobject helper
|
|
|
|
*/
|
|
|
|
void dm_kobject_release(struct kobject *kobj);
|
2009-01-06 11:05:12 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Targets for linear and striped mappings
|
|
|
|
*/
|
|
|
|
int dm_linear_init(void);
|
|
|
|
void dm_linear_exit(void);
|
|
|
|
|
|
|
|
int dm_stripe_init(void);
|
|
|
|
void dm_stripe_exit(void);
|
|
|
|
|
dm: separate device deletion from dm_put
This patch separates the device deletion code from dm_put()
to make sure the deletion happens in the process context.
By this patch, device deletion always occurs in an ioctl (process)
context and dm_put() can be called in interrupt context.
As a result, the request-based dm's bad dm_put() usage pointed out
by Mikulas below disappears.
http://marc.info/?l=dm-devel&m=126699981019735&w=2
Without this patch, I confirmed there is a case to crash the system:
dm_put() => dm_table_destroy() => vfree() => BUG_ON(in_interrupt())
Some more backgrounds and details:
In request-based dm, a device opener can remove a mapped_device
while the last request is still completing, because bios in the last
request complete first and then the device opener can close and remove
the mapped_device before the last request completes:
CPU0 CPU1
=================================================================
<<INTERRUPT>>
blk_end_request_all(clone_rq)
blk_update_request(clone_rq)
bio_endio(clone_bio) == end_clone_bio
blk_update_request(orig_rq)
bio_endio(orig_bio)
<<I/O completed>>
dm_blk_close()
dev_remove()
dm_put(md)
<<Free md>>
blk_finish_request(clone_rq)
....
dm_end_request(clone_rq)
free_rq_clone(clone_rq)
blk_end_request_all(orig_rq)
rq_completed(md)
So request-based dm used dm_get()/dm_put() to hold md for each I/O
until its request completion handling is fully done.
However, the final dm_put() can call the device deletion code which
must not be run in interrupt context and may cause kernel panic.
To solve the problem, this patch moves the device deletion code,
dm_destroy(), to predetermined places that is actually deleting
the mapped_device in ioctl (process) context, and changes dm_put()
just to decrement the reference count of the mapped_device.
By this change, dm_put() can be used in any context and the symmetric
model below is introduced:
dm_create(): create a mapped_device
dm_destroy(): destroy a mapped_device
dm_get(): increment the reference count of a mapped_device
dm_put(): decrement the reference count of a mapped_device
dm_destroy() waits for all references of the mapped_device to disappear,
then deletes the mapped_device.
dm_destroy() uses active waiting with msleep(1), since deleting
the mapped_device isn't performance-critical task.
And since at this point, nobody opens the mapped_device and no new
reference will be taken, the pending counts are just for racing
completing activity and will eventually decrease to zero.
For the unlikely case of the forced module unload, dm_destroy_immediate(),
which doesn't wait and forcibly deletes the mapped_device, is also
introduced and used in dm_hash_remove_all(). Otherwise, "rmmod -f"
may be stuck and never return.
And now, because the mapped_device is deleted at this point, subsequent
accesses to the mapped_device may cause NULL pointer references.
Cc: stable@kernel.org
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2010-08-12 11:13:56 +08:00
|
|
|
/*
|
|
|
|
* mapped_device operations
|
|
|
|
*/
|
|
|
|
void dm_destroy(struct mapped_device *md);
|
|
|
|
void dm_destroy_immediate(struct mapped_device *md);
|
2006-06-26 15:27:34 +08:00
|
|
|
int dm_open_count(struct mapped_device *md);
|
2013-11-02 06:27:41 +08:00
|
|
|
int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred);
|
|
|
|
int dm_cancel_deferred_remove(struct mapped_device *md);
|
2013-08-16 22:54:23 +08:00
|
|
|
int dm_request_based(struct mapped_device *md);
|
2023-06-08 19:02:55 +08:00
|
|
|
int dm_get_table_device(struct mapped_device *md, dev_t dev, blk_mode_t mode,
|
2014-08-14 02:53:43 +08:00
|
|
|
struct dm_dev **result);
|
|
|
|
void dm_put_table_device(struct mapped_device *md, struct dm_dev *d);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-03-06 10:32:31 +08:00
|
|
|
int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
|
2023-01-26 04:14:58 +08:00
|
|
|
unsigned int cookie, bool need_resize_uevent);
|
2007-12-13 22:15:57 +08:00
|
|
|
|
2009-12-11 07:51:57 +08:00
|
|
|
int dm_io_init(void);
|
|
|
|
void dm_io_exit(void);
|
|
|
|
|
2008-04-25 04:43:49 +08:00
|
|
|
int dm_kcopyd_init(void);
|
|
|
|
void dm_kcopyd_exit(void);
|
|
|
|
|
dm: enable request based option
This patch enables request-based dm.
o Request-based dm and bio-based dm coexist, since there are
some target drivers which are more fitting to bio-based dm.
Also, there are other bio-based devices in the kernel
(e.g. md, loop).
Since bio-based device can't receive struct request,
there are some limitations on device stacking between
bio-based and request-based.
type of underlying device
bio-based request-based
----------------------------------------------
bio-based OK OK
request-based -- OK
The device type is recognized by the queue flag in the kernel,
so dm follows that.
o The type of a dm device is decided at the first table binding time.
Once the type of a dm device is decided, the type can't be changed.
o Mempool allocations are deferred to at the table loading time, since
mempools for request-based dm are different from those for bio-based
dm and needed mempool type is fixed by the type of table.
o Currently, request-based dm supports only tables that have a single
target. To support multiple targets, we need to support request
splitting or prevent bio/request from spanning multiple targets.
The former needs lots of changes in the block layer, and the latter
needs that all target drivers support merge() function.
Both will take a time.
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:36 +08:00
|
|
|
/*
|
|
|
|
* Mempool operations
|
|
|
|
*/
|
|
|
|
void dm_free_md_mempools(struct dm_md_mempools *pools);
|
|
|
|
|
2013-08-16 22:54:23 +08:00
|
|
|
/*
|
2016-05-13 04:28:10 +08:00
|
|
|
* Various helpers
|
2013-08-16 22:54:23 +08:00
|
|
|
*/
|
2023-01-26 04:14:58 +08:00
|
|
|
unsigned int dm_get_reserved_bio_based_ios(void);
|
2015-02-26 13:50:28 +08:00
|
|
|
|
2023-03-28 01:59:25 +08:00
|
|
|
#define DM_HASH_LOCKS_MAX 64
|
|
|
|
|
|
|
|
static inline unsigned int dm_num_hash_locks(void)
|
|
|
|
{
|
2023-03-29 22:29:34 +08:00
|
|
|
unsigned int num_locks = roundup_pow_of_two(num_online_cpus()) << 1;
|
2023-03-28 01:59:25 +08:00
|
|
|
|
|
|
|
return min_t(unsigned int, num_locks, DM_HASH_LOCKS_MAX);
|
|
|
|
}
|
|
|
|
|
2023-03-29 22:29:34 +08:00
|
|
|
#define DM_HASH_LOCKS_MULT 4294967291ULL
|
|
|
|
#define DM_HASH_LOCKS_SHIFT 6
|
|
|
|
|
|
|
|
static inline unsigned int dm_hash_locks_index(sector_t block,
|
|
|
|
unsigned int num_locks)
|
|
|
|
{
|
|
|
|
sector_t h1 = (block * DM_HASH_LOCKS_MULT) >> DM_HASH_LOCKS_SHIFT;
|
|
|
|
sector_t h2 = h1 >> DM_HASH_LOCKS_SHIFT;
|
|
|
|
|
|
|
|
return (h1 ^ h2) & (num_locks - 1);
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|