From 8cfa7b0f67b4d899efc7f39eb7e172fd79237811 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 27 Jul 2011 11:00:36 +1000 Subject: [PATCH] md/raid5: Avoid BUG caused by multiple failures. While preparing to write a stripe we keep the parity block or blocks locked (R5_LOCKED) - towards the end of schedule_reconstruction. If the array is discovered to have failed before this write completes we can leave those blocks LOCKED, and init_stripe will notice that a free stripe still has a locked block and will complain. So clear the R5_LOCKED flag in handle_failed_stripe, and demote the 'BUG' to a 'WARN_ON'. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a81eca6434dd..b874f42694e2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -340,7 +340,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) (unsigned long long)sh->sector, i, dev->toread, dev->read, dev->towrite, dev->written, test_bit(R5_LOCKED, &dev->flags)); - BUG(); + WARN_ON(1); } dev->flags = 0; raid5_build_block(sh, i, previous); @@ -2301,6 +2301,10 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, if (bitmap_end) bitmap_endwrite(conf->mddev->bitmap, sh->sector, STRIPE_SECTORS, 0, 0); + /* If we were in the middle of a write the parity block might + * still be locked - so just clear all R5_LOCKED flags + */ + clear_bit(R5_LOCKED, &sh->dev[i].flags); } if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))