[PATCH] md: restart a (raid5) reshape that has been aborted due to a read/write error

An error always aborts any resync/recovery/reshape on the understanding that
it will immediately be restarted if that still makes sense.  However a reshape
currently doesn't get restarted.  With this patch it does.

To avoid restarting when it is not possible to do work, we call into the
personality to check that a reshape is ok, and strengthen raid5_check_reshape
to fail if there are too many failed devices.

We also break some code out into a separate function: remove_and_add_spares as
the indent level for that code was getting crazy.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
NeilBrown 2007-02-28 20:11:48 -08:00 committed by Linus Torvalds
parent d1b5380c7f
commit b4c4c7b809
2 changed files with 47 additions and 29 deletions

View File

@ -5357,6 +5357,44 @@ void md_do_sync(mddev_t *mddev)
EXPORT_SYMBOL_GPL(md_do_sync);
static int remove_and_add_spares(mddev_t *mddev)
{
mdk_rdev_t *rdev;
struct list_head *rtmp;
int spares = 0;
ITERATE_RDEV(mddev,rdev,rtmp)
if (rdev->raid_disk >= 0 &&
(test_bit(Faulty, &rdev->flags) ||
! test_bit(In_sync, &rdev->flags)) &&
atomic_read(&rdev->nr_pending)==0) {
if (mddev->pers->hot_remove_disk(
mddev, rdev->raid_disk)==0) {
char nm[20];
sprintf(nm,"rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm);
rdev->raid_disk = -1;
}
}
if (mddev->degraded) {
ITERATE_RDEV(mddev,rdev,rtmp)
if (rdev->raid_disk < 0
&& !test_bit(Faulty, &rdev->flags)) {
rdev->recovery_offset = 0;
if (mddev->pers->hot_add_disk(mddev,rdev)) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_create_link(&mddev->kobj,
&rdev->kobj, nm);
spares++;
md_new_event(mddev);
} else
break;
}
}
return spares;
}
/*
* This routine is regularly called by all per-raid-array threads to
* deal with generic issues like resync and super-block update.
@ -5474,35 +5512,13 @@ void md_check_recovery(mddev_t *mddev)
* Spare are also removed and re-added, to allow
* the personality to fail the re-add.
*/
ITERATE_RDEV(mddev,rdev,rtmp)
if (rdev->raid_disk >= 0 &&
(test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) &&
atomic_read(&rdev->nr_pending)==0) {
if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) {
char nm[20];
sprintf(nm,"rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm);
rdev->raid_disk = -1;
}
}
if (mddev->degraded) {
ITERATE_RDEV(mddev,rdev,rtmp)
if (rdev->raid_disk < 0
&& !test_bit(Faulty, &rdev->flags)) {
rdev->recovery_offset = 0;
if (mddev->pers->hot_add_disk(mddev,rdev)) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
spares++;
md_new_event(mddev);
} else
break;
}
}
if (spares) {
if (mddev->reshape_position != MaxSector) {
if (mddev->pers->check_reshape(mddev) != 0)
/* Cannot proceed */
goto unlock;
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
} else if ((spares = remove_and_add_spares(mddev))) {
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
} else if (mddev->recovery_cp < MaxSector) {

View File

@ -3814,6 +3814,8 @@ static int raid5_check_reshape(mddev_t *mddev)
if (err)
return err;
if (mddev->degraded > conf->max_degraded)
return -EINVAL;
/* looks like we might be able to manage this */
return 0;
}