mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-19 02:04:19 +08:00
dm raid: add raid level takeover support
Add raid level takeover support allowing arbitrary takeovers between raid levels supported by md personalities (i.e. raid0, raid1/10 and raid4/5/6): - add rs_config_{backup|restore} function to allow for temporary storing ctr requested layout changes and restore them for takeover conersion decision after the superblocks got loaded and analyzed - add members to store layout to 'struct raid_set' (not mandatory for takeover but needed for reshape in later patch) - add rebuild_disks bitfield to 'struct raid_set' and set bits in ctr to use in setting up takeover (base to address a 'rebuild' related raid_status() table line bug and needed as well for reshape in future patch) - add runtime flags and respective manipulation functions to be able to control e.g. wrting of superlocks to the preresume function on takeover and (later) reshape - add functions to detect takeover, check it's valid (mandatory here to avoid failing on md_run()), setup for it and use in the ctr; those will be likely moved out once reshaping gets added to simplify the ctr - start raid set readonly in ctr and switch to readwrite, optionally updating superblocks, in preresume in order to allow suspend to quiesce any active table before (which involves superblock updates); this ensures the proper sequence of writing the current and any new takeover(/reshape) metadata Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
parent
7b34df74d2
commit
ecbfb9f118
@ -98,6 +98,13 @@ struct raid_dev {
|
|||||||
#define ALL_CTR_FLAGS (CTR_FLAG_OPTIONS_NO_ARGS | \
|
#define ALL_CTR_FLAGS (CTR_FLAG_OPTIONS_NO_ARGS | \
|
||||||
CTR_FLAG_OPTIONS_ONE_ARG)
|
CTR_FLAG_OPTIONS_ONE_ARG)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All flags which cause a recovery unfreeze once they got stored in the raid metadata
|
||||||
|
*/
|
||||||
|
#define ALL_FREEZE_FLAGS (ALL_CTR_FLAGS & ~(CTR_FLAG_REGION_SIZE | CTR_FLAGS_ANY_SYNC | \
|
||||||
|
CTR_FLAG_RAID10_FORMAT | CTR_FLAG_RAID10_COPIES | \
|
||||||
|
CTR_FLAG_RAID10_USE_NEAR_SETS))
|
||||||
|
|
||||||
/* Invalid options definitions per raid level... */
|
/* Invalid options definitions per raid level... */
|
||||||
|
|
||||||
/* "raid0" does not accept any options */
|
/* "raid0" does not accept any options */
|
||||||
@ -129,14 +136,39 @@ struct raid_dev {
|
|||||||
#define RAID6_INVALID_FLAGS (CTR_FLAG_NOSYNC | RAID45_INVALID_FLAGS)
|
#define RAID6_INVALID_FLAGS (CTR_FLAG_NOSYNC | RAID45_INVALID_FLAGS)
|
||||||
/* ...invalid options definitions per raid level */
|
/* ...invalid options definitions per raid level */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flags for rs->runtime_flags field
|
||||||
|
* (RT_FLAG prefix meaning "runtime flag")
|
||||||
|
*
|
||||||
|
* These are all internal and used to define runtime state,
|
||||||
|
* e.g. to prevent another resume from preresume processing
|
||||||
|
* the raid set all over again.
|
||||||
|
*/
|
||||||
|
#define RT_FLAG_RS_PRERESUMED 0x1
|
||||||
|
#define RT_FLAG_RS_RESUMED 0x2
|
||||||
|
#define RT_FLAG_RS_BITMAP_LOADED 0x4
|
||||||
|
#define RT_FLAG_UPDATE_SBS 0x8
|
||||||
|
|
||||||
/* Array elements of 64 bit needed for rebuild/write_mostly bits */
|
/* Array elements of 64 bit needed for rebuild/write_mostly bits */
|
||||||
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
|
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* raid set level, layout and chunk sectors backup/restore
|
||||||
|
*/
|
||||||
|
struct rs_layout {
|
||||||
|
int new_level;
|
||||||
|
int new_layout;
|
||||||
|
int new_chunk_sectors;
|
||||||
|
};
|
||||||
|
|
||||||
struct raid_set {
|
struct raid_set {
|
||||||
struct dm_target *ti;
|
struct dm_target *ti;
|
||||||
|
|
||||||
uint32_t bitmap_loaded;
|
uint32_t bitmap_loaded;
|
||||||
uint32_t ctr_flags;
|
uint32_t ctr_flags;
|
||||||
|
uint32_t runtime_flags;
|
||||||
|
|
||||||
|
uint64_t rebuild_disks[DISKS_ARRAY_ELEMS];
|
||||||
|
|
||||||
int raid_disks;
|
int raid_disks;
|
||||||
int delta_disks;
|
int delta_disks;
|
||||||
@ -146,10 +178,41 @@ struct raid_set {
|
|||||||
struct mddev md;
|
struct mddev md;
|
||||||
struct raid_type *raid_type;
|
struct raid_type *raid_type;
|
||||||
struct dm_target_callbacks callbacks;
|
struct dm_target_callbacks callbacks;
|
||||||
|
struct rs_layout rs_layout;
|
||||||
|
|
||||||
struct raid_dev dev[0];
|
struct raid_dev dev[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Backup/restore raid set configuration helpers */
|
||||||
|
static void _rs_config_backup(struct raid_set *rs, struct rs_layout *l)
|
||||||
|
{
|
||||||
|
struct mddev *mddev = &rs->md;
|
||||||
|
|
||||||
|
l->new_level = mddev->new_level;
|
||||||
|
l->new_layout = mddev->new_layout;
|
||||||
|
l->new_chunk_sectors = mddev->new_chunk_sectors;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rs_config_backup(struct raid_set *rs)
|
||||||
|
{
|
||||||
|
return _rs_config_backup(rs, &rs->rs_layout);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _rs_config_restore(struct raid_set *rs, struct rs_layout *l)
|
||||||
|
{
|
||||||
|
struct mddev *mddev = &rs->md;
|
||||||
|
|
||||||
|
mddev->new_level = l->new_level;
|
||||||
|
mddev->new_layout = l->new_layout;
|
||||||
|
mddev->new_chunk_sectors = l->new_chunk_sectors;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rs_config_restore(struct raid_set *rs)
|
||||||
|
{
|
||||||
|
return _rs_config_restore(rs, &rs->rs_layout);
|
||||||
|
}
|
||||||
|
/* END: backup/restore raid set configuration helpers */
|
||||||
|
|
||||||
/* raid10 algorithms (i.e. formats) */
|
/* raid10 algorithms (i.e. formats) */
|
||||||
#define ALGORITHM_RAID10_DEFAULT 0
|
#define ALGORITHM_RAID10_DEFAULT 0
|
||||||
#define ALGORITHM_RAID10_NEAR 1
|
#define ALGORITHM_RAID10_NEAR 1
|
||||||
@ -201,6 +264,13 @@ static void _set_flag(uint32_t flag, uint32_t *flags)
|
|||||||
*flags |= flag;
|
*flags |= flag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Clear single @flag in @flags */
|
||||||
|
static void _clear_flag(uint32_t flag, uint32_t *flags)
|
||||||
|
{
|
||||||
|
WARN_ON_ONCE(hweight32(flag) != 1);
|
||||||
|
*flags &= ~flag;
|
||||||
|
}
|
||||||
|
|
||||||
/* Test single @flag in @flags */
|
/* Test single @flag in @flags */
|
||||||
static bool _test_flag(uint32_t flag, uint32_t flags)
|
static bool _test_flag(uint32_t flag, uint32_t flags)
|
||||||
{
|
{
|
||||||
@ -229,6 +299,17 @@ static bool _test_and_set_flag(uint32_t flag, uint32_t *flags)
|
|||||||
_set_flag(flag, flags);
|
_set_flag(flag, flags);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return true if single @flag is set in @*flags and clear it, else return false */
|
||||||
|
static bool _test_and_clear_flag(uint32_t flag, uint32_t *flags)
|
||||||
|
{
|
||||||
|
if (_test_flag(flag, *flags)) {
|
||||||
|
_clear_flag(flag, flags);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
/* ...ctr and runtime flag bit manipulation */
|
/* ...ctr and runtime flag bit manipulation */
|
||||||
|
|
||||||
/* All table line arguments are defined here */
|
/* All table line arguments are defined here */
|
||||||
@ -576,7 +657,7 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
|
|||||||
rs->md.layout = raid_type->algorithm;
|
rs->md.layout = raid_type->algorithm;
|
||||||
rs->md.new_layout = rs->md.layout;
|
rs->md.new_layout = rs->md.layout;
|
||||||
rs->md.delta_disks = 0;
|
rs->md.delta_disks = 0;
|
||||||
rs->md.recovery_cp = 0;
|
rs->md.recovery_cp = rs_is_raid0(rs) ? MaxSector : 0;
|
||||||
|
|
||||||
for (i = 0; i < raid_devs; i++)
|
for (i = 0; i < raid_devs; i++)
|
||||||
md_rdev_init(&rs->dev[i].rdev);
|
md_rdev_init(&rs->dev[i].rdev);
|
||||||
@ -1007,9 +1088,12 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
|
|||||||
* indexes of replaced devices and to set up additional
|
* indexes of replaced devices and to set up additional
|
||||||
* devices on raid level takeover.
|
* devices on raid level takeover.
|
||||||
*/
|
*/
|
||||||
if (!_in_range(value, 0, rs->md.raid_disks - 1))
|
if (!_in_range(value, 0, rs->raid_disks - 1))
|
||||||
return ti_error_einval(rs->ti, "Invalid rebuild index given");
|
return ti_error_einval(rs->ti, "Invalid rebuild index given");
|
||||||
|
|
||||||
|
if (test_and_set_bit(value, (void *) rs->rebuild_disks))
|
||||||
|
return ti_error_einval(rs->ti, "rebuild for this index already given");
|
||||||
|
|
||||||
rd = rs->dev + value;
|
rd = rs->dev + value;
|
||||||
clear_bit(In_sync, &rd->rdev.flags);
|
clear_bit(In_sync, &rd->rdev.flags);
|
||||||
clear_bit(Faulty, &rd->rdev.flags);
|
clear_bit(Faulty, &rd->rdev.flags);
|
||||||
@ -1175,8 +1259,166 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
|
|||||||
return mddev_congested(&rs->md, bits);
|
return mddev_congested(&rs->md, bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure a valid takover (level switch) is being requested on @rs
|
||||||
|
*
|
||||||
|
* Conversions of raid sets from one MD personality to another
|
||||||
|
* have to conform to restrictions which are enforced here.
|
||||||
|
*
|
||||||
|
* Degration is already checked for in rs_check_conversion() below.
|
||||||
|
*/
|
||||||
|
static int rs_check_takeover(struct raid_set *rs)
|
||||||
|
{
|
||||||
|
struct mddev *mddev = &rs->md;
|
||||||
|
unsigned int near_copies;
|
||||||
|
|
||||||
|
switch (mddev->level) {
|
||||||
|
case 0:
|
||||||
|
/* raid0 -> raid1/5 with one disk */
|
||||||
|
if ((mddev->new_level == 1 || mddev->new_level == 5) &&
|
||||||
|
mddev->raid_disks == 1)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* raid0 -> raid10 */
|
||||||
|
if (mddev->new_level == 10 &&
|
||||||
|
!(rs->raid_disks % 2))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* raid0 with multiple disks -> raid4/5/6 */
|
||||||
|
if (_in_range(mddev->new_level, 4, 6) &&
|
||||||
|
mddev->new_layout == ALGORITHM_PARITY_N &&
|
||||||
|
mddev->raid_disks > 1)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 10:
|
||||||
|
/* Can't takeover raid10_offset! */
|
||||||
|
if (_is_raid10_offset(mddev->layout))
|
||||||
|
break;
|
||||||
|
|
||||||
|
near_copies = _raid10_near_copies(mddev->layout);
|
||||||
|
|
||||||
|
/* raid10* -> raid0 */
|
||||||
|
if (mddev->new_level == 0) {
|
||||||
|
/* Can takeover raid10_near with raid disks divisable by data copies! */
|
||||||
|
if (near_copies > 1 &&
|
||||||
|
!(mddev->raid_disks % near_copies)) {
|
||||||
|
mddev->raid_disks /= near_copies;
|
||||||
|
mddev->delta_disks = mddev->raid_disks;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Can takeover raid10_far */
|
||||||
|
if (near_copies == 1 &&
|
||||||
|
_raid10_far_copies(mddev->layout) > 1)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* raid10_{near,far} -> raid1 */
|
||||||
|
if (mddev->new_level == 1 &&
|
||||||
|
max(near_copies, _raid10_far_copies(mddev->layout)) == mddev->raid_disks)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* raid10_{near,far} with 2 disks -> raid4/5 */
|
||||||
|
if (_in_range(mddev->new_level, 4, 5) &&
|
||||||
|
mddev->raid_disks == 2)
|
||||||
|
return 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
/* raid1 with 2 disks -> raid4/5 */
|
||||||
|
if (_in_range(mddev->new_level, 4, 5) &&
|
||||||
|
mddev->raid_disks == 2) {
|
||||||
|
mddev->degraded = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* raid1 -> raid0 */
|
||||||
|
if (mddev->new_level == 0 &&
|
||||||
|
mddev->raid_disks == 1)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* raid1 -> raid10 */
|
||||||
|
if (mddev->new_level == 10)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 4:
|
||||||
|
/* raid4 -> raid0 */
|
||||||
|
if (mddev->new_level == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* raid4 -> raid1/5 with 2 disks */
|
||||||
|
if ((mddev->new_level == 1 || mddev->new_level == 5) &&
|
||||||
|
mddev->raid_disks == 2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* raid4 -> raid5/6 with parity N */
|
||||||
|
if (_in_range(mddev->new_level, 5, 6) &&
|
||||||
|
mddev->layout == ALGORITHM_PARITY_N)
|
||||||
|
return 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 5:
|
||||||
|
/* raid5 with parity N -> raid0 */
|
||||||
|
if (mddev->new_level == 0 &&
|
||||||
|
mddev->layout == ALGORITHM_PARITY_N)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* raid5 with parity N -> raid4 */
|
||||||
|
if (mddev->new_level == 4 &&
|
||||||
|
mddev->layout == ALGORITHM_PARITY_N)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* raid5 with 2 disks -> raid1/4/10 */
|
||||||
|
if ((mddev->new_level == 1 || mddev->new_level == 4 || mddev->new_level == 10) &&
|
||||||
|
mddev->raid_disks == 2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* raid5 with parity N -> raid6 with parity N */
|
||||||
|
if (mddev->new_level == 6 &&
|
||||||
|
((mddev->layout == ALGORITHM_PARITY_N && mddev->new_layout == ALGORITHM_PARITY_N) ||
|
||||||
|
_in_range(mddev->new_layout, ALGORITHM_LEFT_ASYMMETRIC_6, ALGORITHM_RIGHT_SYMMETRIC_6)))
|
||||||
|
return 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 6:
|
||||||
|
/* raid6 with parity N -> raid0 */
|
||||||
|
if (mddev->new_level == 0 &&
|
||||||
|
mddev->layout == ALGORITHM_PARITY_N)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* raid6 with parity N -> raid4 */
|
||||||
|
if (mddev->new_level == 4 &&
|
||||||
|
mddev->layout == ALGORITHM_PARITY_N)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* raid6_*_n with parity N -> raid5_* */
|
||||||
|
if (mddev->new_level == 5 &&
|
||||||
|
((mddev->layout == ALGORITHM_PARITY_N && mddev->new_layout == ALGORITHM_PARITY_N) ||
|
||||||
|
_in_range(mddev->new_layout, ALGORITHM_LEFT_ASYMMETRIC, ALGORITHM_RIGHT_SYMMETRIC)))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ti_error_einval(rs->ti, "takeover not possible");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* True if @rs requested to be taken over */
|
||||||
|
static bool rs_takeover_requested(struct raid_set *rs)
|
||||||
|
{
|
||||||
|
return rs->md.new_level != rs->md.level;
|
||||||
|
}
|
||||||
|
|
||||||
/* Features */
|
/* Features */
|
||||||
#define FEATURE_FLAG_SUPPORTS_RESHAPE 0x1
|
#define FEATURE_FLAG_SUPPORTS_V180 0x1 /* Supports v1.8.0 extended superblock */
|
||||||
|
#define FEATURE_FLAG_SUPPORTS_RESHAPE 0x2 /* Supports v1.8.0 reshaping functionality */
|
||||||
|
|
||||||
/* State flags for sb->flags */
|
/* State flags for sb->flags */
|
||||||
#define SB_FLAG_RESHAPE_ACTIVE 0x1
|
#define SB_FLAG_RESHAPE_ACTIVE 0x1
|
||||||
@ -1220,7 +1462,7 @@ struct dm_raid_superblock {
|
|||||||
/********************************************************************
|
/********************************************************************
|
||||||
* BELOW FOLLOW V1.8.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
|
* BELOW FOLLOW V1.8.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
|
||||||
*
|
*
|
||||||
* FEATURE_FLAG_SUPPORTS_RESHAPE in the features member indicates that those exist
|
* FEATURE_FLAG_SUPPORTS_V180 in the features member indicates that those exist
|
||||||
*/
|
*/
|
||||||
|
|
||||||
__le32 flags; /* Flags defining array states for reshaping */
|
__le32 flags; /* Flags defining array states for reshaping */
|
||||||
@ -1287,7 +1529,7 @@ static void sb_retrieve_failed_devices(struct dm_raid_superblock *sb, uint64_t *
|
|||||||
failed_devices[0] = le64_to_cpu(sb->failed_devices);
|
failed_devices[0] = le64_to_cpu(sb->failed_devices);
|
||||||
memset(failed_devices + 1, 0, sizeof(sb->extended_failed_devices));
|
memset(failed_devices + 1, 0, sizeof(sb->extended_failed_devices));
|
||||||
|
|
||||||
if (_test_flag(FEATURE_FLAG_SUPPORTS_RESHAPE, le32_to_cpu(sb->compat_features))) {
|
if (_test_flag(FEATURE_FLAG_SUPPORTS_V180, le32_to_cpu(sb->compat_features))) {
|
||||||
int i = ARRAY_SIZE(sb->extended_failed_devices);
|
int i = ARRAY_SIZE(sb->extended_failed_devices);
|
||||||
|
|
||||||
while (i--)
|
while (i--)
|
||||||
@ -1337,7 +1579,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
|
|||||||
sb_update_failed_devices(sb, failed_devices);
|
sb_update_failed_devices(sb, failed_devices);
|
||||||
|
|
||||||
sb->magic = cpu_to_le32(DM_RAID_MAGIC);
|
sb->magic = cpu_to_le32(DM_RAID_MAGIC);
|
||||||
sb->compat_features = cpu_to_le32(0); /* Don't set reshape flag yet */
|
sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V180); /* Don't set reshape flag yet */
|
||||||
|
|
||||||
sb->num_devices = cpu_to_le32(mddev->raid_disks);
|
sb->num_devices = cpu_to_le32(mddev->raid_disks);
|
||||||
sb->array_position = cpu_to_le32(rdev->raid_disk);
|
sb->array_position = cpu_to_le32(rdev->raid_disk);
|
||||||
@ -1416,6 +1658,7 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
|
|||||||
super_sync(rdev->mddev, rdev);
|
super_sync(rdev->mddev, rdev);
|
||||||
|
|
||||||
set_bit(FirstUse, &rdev->flags);
|
set_bit(FirstUse, &rdev->flags);
|
||||||
|
sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V180); /* Don't set reshape flag yet */
|
||||||
|
|
||||||
/* Force writing of superblocks to disk */
|
/* Force writing of superblocks to disk */
|
||||||
set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
|
||||||
@ -1461,7 +1704,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
|
|||||||
* Reshaping is supported, e.g. reshape_position is valid
|
* Reshaping is supported, e.g. reshape_position is valid
|
||||||
* in superblock and superblock content is authoritative.
|
* in superblock and superblock content is authoritative.
|
||||||
*/
|
*/
|
||||||
if (_test_flag(FEATURE_FLAG_SUPPORTS_RESHAPE, le32_to_cpu(sb->compat_features))) {
|
if (_test_flag(FEATURE_FLAG_SUPPORTS_V180, le32_to_cpu(sb->compat_features))) {
|
||||||
/* Superblock is authoritative wrt given raid set layout! */
|
/* Superblock is authoritative wrt given raid set layout! */
|
||||||
mddev->raid_disks = le32_to_cpu(sb->num_devices);
|
mddev->raid_disks = le32_to_cpu(sb->num_devices);
|
||||||
mddev->level = le32_to_cpu(sb->level);
|
mddev->level = le32_to_cpu(sb->level);
|
||||||
@ -1564,6 +1807,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
|
|||||||
if (new_devs == rs->raid_disks) {
|
if (new_devs == rs->raid_disks) {
|
||||||
DMINFO("Superblocks created for new raid set");
|
DMINFO("Superblocks created for new raid set");
|
||||||
set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
|
set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
|
||||||
|
_set_flag(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
|
||||||
mddev->recovery_cp = 0;
|
mddev->recovery_cp = 0;
|
||||||
} else if (new_devs && new_devs != rs->raid_disks && !rebuilds) {
|
} else if (new_devs && new_devs != rs->raid_disks && !rebuilds) {
|
||||||
DMERR("New device injected into existing raid set without "
|
DMERR("New device injected into existing raid set without "
|
||||||
@ -1657,8 +1901,9 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
|
|||||||
if (!mddev->events && super_init_validation(rs, rdev))
|
if (!mddev->events && super_init_validation(rs, rdev))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (sb->compat_features || sb->incompat_features) {
|
if (le32_to_cpu(sb->compat_features) != FEATURE_FLAG_SUPPORTS_V180 ||
|
||||||
rs->ti->error = "Unable to assemble array: No feature flags supported yet";
|
sb->incompat_features) {
|
||||||
|
rs->ti->error = "Unable to assemble array: No incompatible feature flags supported yet";
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1718,8 +1963,6 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
|
|||||||
* that the "sync" directive is disallowed during the
|
* that the "sync" directive is disallowed during the
|
||||||
* reshape.
|
* reshape.
|
||||||
*/
|
*/
|
||||||
rdev->sectors = to_sector(i_size_read(rdev->bdev->bd_inode));
|
|
||||||
|
|
||||||
if (_test_flag(CTR_FLAG_SYNC, rs->ctr_flags))
|
if (_test_flag(CTR_FLAG_SYNC, rs->ctr_flags))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -1785,14 +2028,77 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Userpace reordered disks -> adjust raid_disk indexes in @rs */
|
||||||
|
static void _reorder_raid_disk_indexes(struct raid_set *rs)
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
struct md_rdev *rdev;
|
||||||
|
|
||||||
|
rdev_for_each(rdev, &rs->md) {
|
||||||
|
rdev->raid_disk = i++;
|
||||||
|
rdev->saved_raid_disk = rdev->new_raid_disk = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setup @rs for takeover by a different raid level
|
||||||
|
*/
|
||||||
|
static int rs_setup_takeover(struct raid_set *rs)
|
||||||
|
{
|
||||||
|
struct mddev *mddev = &rs->md;
|
||||||
|
struct md_rdev *rdev;
|
||||||
|
unsigned int d = mddev->raid_disks = rs->raid_disks;
|
||||||
|
sector_t new_data_offset = rs->dev[0].rdev.data_offset ? 0 : rs->data_offset;
|
||||||
|
|
||||||
|
if (rt_is_raid10(rs->raid_type)) {
|
||||||
|
if (mddev->level == 0) {
|
||||||
|
/* Userpace reordered disks -> adjust raid_disk indexes */
|
||||||
|
_reorder_raid_disk_indexes(rs);
|
||||||
|
|
||||||
|
/* raid0 -> raid10_far layout */
|
||||||
|
mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_FAR,
|
||||||
|
rs->raid10_copies);
|
||||||
|
} else if (mddev->level == 1)
|
||||||
|
/* raid1 -> raid10_near layout */
|
||||||
|
mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_NEAR,
|
||||||
|
rs->raid_disks);
|
||||||
|
else
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
|
||||||
|
mddev->recovery_cp = MaxSector;
|
||||||
|
|
||||||
|
while (d--) {
|
||||||
|
rdev = &rs->dev[d].rdev;
|
||||||
|
|
||||||
|
if (test_bit(d, (void *) rs->rebuild_disks)) {
|
||||||
|
clear_bit(In_sync, &rdev->flags);
|
||||||
|
clear_bit(Faulty, &rdev->flags);
|
||||||
|
mddev->recovery_cp = rdev->recovery_offset = 0;
|
||||||
|
/* Bitmap has to be created when we do an "up" takeover */
|
||||||
|
set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
rdev->new_data_offset = new_data_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
rs_set_new(rs);
|
||||||
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enable/disable discard support on RAID set depending on
|
* Enable/disable discard support on RAID set depending on
|
||||||
* RAID level and discard properties of underlying RAID members.
|
* RAID level and discard properties of underlying RAID members.
|
||||||
*/
|
*/
|
||||||
static void configure_discard_support(struct dm_target *ti, struct raid_set *rs)
|
static void configure_discard_support(struct raid_set *rs)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
bool raid456;
|
bool raid456;
|
||||||
|
struct dm_target *ti = rs->ti;
|
||||||
|
|
||||||
/* Assume discards not supported until after checks below. */
|
/* Assume discards not supported until after checks below. */
|
||||||
ti->discards_supported = false;
|
ti->discards_supported = false;
|
||||||
@ -1894,6 +2200,14 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||||||
goto bad;
|
goto bad;
|
||||||
|
|
||||||
rs->md.sync_super = super_sync;
|
rs->md.sync_super = super_sync;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Backup any new raid set level, layout, ...
|
||||||
|
* requested to be able to compare to superblock
|
||||||
|
* members for conversion decisions.
|
||||||
|
*/
|
||||||
|
rs_config_backup(rs);
|
||||||
|
|
||||||
r = analyse_superblocks(ti, rs);
|
r = analyse_superblocks(ti, rs);
|
||||||
if (r)
|
if (r)
|
||||||
goto bad;
|
goto bad;
|
||||||
@ -1902,10 +2216,29 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||||||
ti->private = rs;
|
ti->private = rs;
|
||||||
ti->num_flush_bios = 1;
|
ti->num_flush_bios = 1;
|
||||||
|
|
||||||
|
/* Restore any requested new layout for conversion decision */
|
||||||
|
rs_config_restore(rs);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Disable/enable discard support on RAID set.
|
* If a takeover is needed, just set the level to
|
||||||
|
* the new requested one and allow the raid set to run.
|
||||||
*/
|
*/
|
||||||
configure_discard_support(ti, rs);
|
if (rs_takeover_requested(rs)) {
|
||||||
|
r = rs_check_takeover(rs);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
r = rs_setup_takeover(rs);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
_set_flag(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Start raid set read-only and assumed clean to change in raid_resume() */
|
||||||
|
rs->md.ro = 1;
|
||||||
|
rs->md.in_sync = 1;
|
||||||
|
set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
|
||||||
|
|
||||||
/* Has to be held on running the array */
|
/* Has to be held on running the array */
|
||||||
mddev_lock_nointr(&rs->md);
|
mddev_lock_nointr(&rs->md);
|
||||||
@ -2312,29 +2645,92 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Load the dirty region bitmap */
|
||||||
|
static int _bitmap_load(struct raid_set *rs)
|
||||||
|
{
|
||||||
|
int r = 0;
|
||||||
|
|
||||||
|
/* Try loading the bitmap unless "raid0", which does not have one */
|
||||||
|
if (!rs_is_raid0(rs) &&
|
||||||
|
!_test_and_set_flag(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags)) {
|
||||||
|
r = bitmap_load(&rs->md);
|
||||||
|
if (r)
|
||||||
|
DMERR("Failed to load bitmap");
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int raid_preresume(struct dm_target *ti)
|
||||||
|
{
|
||||||
|
struct raid_set *rs = ti->private;
|
||||||
|
struct mddev *mddev = &rs->md;
|
||||||
|
|
||||||
|
/* This is a resume after a suspend of the set -> it's already started */
|
||||||
|
if (_test_and_set_flag(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The superblocks need to be updated on disk if the
|
||||||
|
* array is new or _bitmap_load will overwrite them
|
||||||
|
* in core with old data.
|
||||||
|
*
|
||||||
|
* In case the array got modified (takeover/reshape/resize)
|
||||||
|
* or the data offsets on the component devices changed, they
|
||||||
|
* have to be updated as well.
|
||||||
|
*
|
||||||
|
* Have to switch to readwrite and back in order to
|
||||||
|
* allow for the superblock updates.
|
||||||
|
*/
|
||||||
|
if (_test_and_clear_flag(RT_FLAG_UPDATE_SBS, &rs->runtime_flags)) {
|
||||||
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
|
mddev->ro = 0;
|
||||||
|
md_update_sb(mddev, 1);
|
||||||
|
mddev->ro = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disable/enable discard support on raid set after any
|
||||||
|
* conversion, because devices can have been added
|
||||||
|
*/
|
||||||
|
configure_discard_support(rs);
|
||||||
|
|
||||||
|
/* Load the bitmap from disk unless raid0 */
|
||||||
|
return _bitmap_load(rs);
|
||||||
|
}
|
||||||
|
|
||||||
static void raid_resume(struct dm_target *ti)
|
static void raid_resume(struct dm_target *ti)
|
||||||
{
|
{
|
||||||
struct raid_set *rs = ti->private;
|
struct raid_set *rs = ti->private;
|
||||||
|
struct mddev *mddev = &rs->md;
|
||||||
|
|
||||||
if (!rt_is_raid0(rs->raid_type)) {
|
if (_test_and_set_flag(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) {
|
||||||
set_bit(MD_CHANGE_DEVS, &rs->md.flags);
|
/*
|
||||||
|
* A secondary resume while the device is active.
|
||||||
|
* Take this opportunity to check whether any failed
|
||||||
|
* devices are reachable again.
|
||||||
|
*/
|
||||||
|
attempt_restore_of_faulty_devices(rs);
|
||||||
|
|
||||||
if (!rs->bitmap_loaded) {
|
} else {
|
||||||
bitmap_load(&rs->md);
|
mddev->in_sync = 0;
|
||||||
rs->bitmap_loaded = 1;
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* A secondary resume while the device is active.
|
|
||||||
* Take this opportunity to check whether any failed
|
|
||||||
* devices are reachable again.
|
|
||||||
*/
|
|
||||||
attempt_restore_of_faulty_devices(rs);
|
|
||||||
}
|
|
||||||
|
|
||||||
clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
|
/*
|
||||||
|
* If any of the constructor flags got passed in
|
||||||
|
* but "region_size" (gets always passed in for
|
||||||
|
* mappings with bitmap), we expect userspace to
|
||||||
|
* reset them and reload the mapping anyway.
|
||||||
|
*
|
||||||
|
* -> don't unfreeze resynchronization until imminant
|
||||||
|
* reload of the table w/o theses flags
|
||||||
|
*/
|
||||||
|
if (!_test_flags(ALL_FREEZE_FLAGS, rs->ctr_flags))
|
||||||
|
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||||
}
|
}
|
||||||
|
|
||||||
mddev_resume(&rs->md);
|
mddev->ro = 0;
|
||||||
|
if (mddev->suspended)
|
||||||
|
mddev_resume(mddev);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct target_type raid_target = {
|
static struct target_type raid_target = {
|
||||||
@ -2350,6 +2746,7 @@ static struct target_type raid_target = {
|
|||||||
.io_hints = raid_io_hints,
|
.io_hints = raid_io_hints,
|
||||||
.presuspend = raid_presuspend,
|
.presuspend = raid_presuspend,
|
||||||
.postsuspend = raid_postsuspend,
|
.postsuspend = raid_postsuspend,
|
||||||
|
.preresume = raid_preresume,
|
||||||
.resume = raid_resume,
|
.resume = raid_resume,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user