mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/kdave/btrfs-progs.git
synced 2024-12-11 21:23:26 +08:00
btrfs-progs: allow read_data_from_disk() to rebuild RAID56 using P/Q
This new ability is added by: - Allow btrfs_map_block() to return the chunk type This makes later work much easier - Only reset stripe offset inside btrfs_map_block() when needed Currently if @raid_map is not NULL, btrfs_map_block() will consider this call is for WRITE and will reset stripe offset. This is no longer the case, as for RAID56 read with mirror_num 1/0, we will still call btrfs_map_block() with non-NULL raid_map. Add a small check to make sure we won't reset stripe offset for mirror 1/0 read. - Add new helper read_raid56() to handle rebuild We will read the full stripe (including all data and P/Q stripes) do the rebuild, then only copy the refered part to the caller. There is a catch for RAID6, we have no way to exhaust all combination, so the current repair will assume the mirror = 0 data is corrupted, then try to find a missing device. But if no missing device can be found, it will assume P is corrupted. This is just a guess, and can to totally wrong, but we have no better idea. Now btrfs-progs have full read ability for RAID56. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
a99bece1cd
commit
4e9e978783
@ -26,6 +26,7 @@
|
||||
#include "kerncompat.h"
|
||||
#include "kernel-shared/extent_io.h"
|
||||
#include "kernel-lib/list.h"
|
||||
#include "kernel-lib/raid56.h"
|
||||
#include "kernel-shared/ctree.h"
|
||||
#include "kernel-shared/volumes.h"
|
||||
#include "kernel-shared/disk-io.h"
|
||||
@ -788,23 +789,131 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int read_raid56(struct btrfs_fs_info *fs_info, void *buf, u64 logical,
|
||||
u64 len, int mirror, struct btrfs_multi_bio *multi,
|
||||
u64 *raid_map)
|
||||
{
|
||||
const int num_stripes = multi->num_stripes;
|
||||
const u64 full_stripe_start = raid_map[0];
|
||||
void **pointers = NULL;
|
||||
int failed_a = -1;
|
||||
int failed_b = -1;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
/* Only read repair should go this path */
|
||||
ASSERT(mirror > 1);
|
||||
ASSERT(raid_map);
|
||||
|
||||
/* The read length should be inside one stripe */
|
||||
ASSERT(len <= BTRFS_STRIPE_LEN);
|
||||
|
||||
pointers = calloc(num_stripes, sizeof(void *));
|
||||
if (!pointers) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
/* Allocate memory for the full stripe */
|
||||
for (i = 0; i < num_stripes; i++) {
|
||||
pointers[i] = malloc(BTRFS_STRIPE_LEN);
|
||||
if (!pointers[i]) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the full stripe.
|
||||
*
|
||||
* The stripes in @multi is not rotated, thus can be used to read from
|
||||
* disk directly.
|
||||
*/
|
||||
for (i = 0; i < num_stripes; i++) {
|
||||
ret = btrfs_pread(multi->stripes[i].dev->fd, pointers[i],
|
||||
BTRFS_STRIPE_LEN, multi->stripes[i].physical,
|
||||
fs_info->zoned);
|
||||
if (ret < BTRFS_STRIPE_LEN) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the failed index.
|
||||
*
|
||||
* Since we're reading using mirror_num > 1 already, it means the data
|
||||
* stripe where @logical lies in is definitely corrupted.
|
||||
*/
|
||||
failed_a = (logical - full_stripe_start) / BTRFS_STRIPE_LEN;
|
||||
|
||||
/*
|
||||
* For RAID6, we don't have good way to exhaust all the combinations,
|
||||
* so here we can only go through the map to see if we have missing devices.
|
||||
*/
|
||||
if (multi->type & BTRFS_BLOCK_GROUP_RAID6) {
|
||||
for (i = 0; i < num_stripes; i++) {
|
||||
/* Skip failed_a, as it's already marked failed */
|
||||
if (i == failed_a)
|
||||
continue;
|
||||
/* Missing dev */
|
||||
if (multi->stripes[i].dev->fd == -1) {
|
||||
failed_b = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* No missing device, we have no better idea, default to P
|
||||
* corruption
|
||||
*/
|
||||
if (failed_b < 0)
|
||||
failed_b = num_stripes - 2;
|
||||
}
|
||||
|
||||
/* Rebuild the full stripe */
|
||||
ret = raid56_recov(num_stripes, BTRFS_STRIPE_LEN, multi->type,
|
||||
failed_a, failed_b, pointers);
|
||||
ASSERT(ret == 0);
|
||||
|
||||
/* Now copy the data back to original buf */
|
||||
memcpy(buf, pointers[failed_a] + (logical - full_stripe_start) %
|
||||
BTRFS_STRIPE_LEN, len);
|
||||
ret = 0;
|
||||
out:
|
||||
for (i = 0; i < num_stripes; i++)
|
||||
free(pointers[i]);
|
||||
free(pointers);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 logical,
|
||||
u64 *len, int mirror)
|
||||
{
|
||||
struct btrfs_multi_bio *multi = NULL;
|
||||
struct btrfs_device *device;
|
||||
u64 read_len = *len;
|
||||
u64 *raid_map = NULL;
|
||||
int ret;
|
||||
|
||||
ret = btrfs_map_block(info, READ, logical, &read_len, &multi, mirror,
|
||||
NULL);
|
||||
&raid_map);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Couldn't map the block %llu\n", logical);
|
||||
return -EIO;
|
||||
}
|
||||
read_len = min(*len, read_len);
|
||||
|
||||
/* We need to rebuild from P/Q */
|
||||
if (mirror > 1 && multi->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
||||
ret = read_raid56(info, buf, logical, read_len, mirror, multi,
|
||||
raid_map);
|
||||
free(multi);
|
||||
free(raid_map);
|
||||
*len = read_len;
|
||||
return ret;
|
||||
}
|
||||
free(raid_map);
|
||||
device = multi->stripes[0].dev;
|
||||
|
||||
read_len = min(*len, read_len);
|
||||
if (device->fd <= 0) {
|
||||
kfree(multi);
|
||||
return -EIO;
|
||||
@ -824,6 +933,7 @@ int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 logical,
|
||||
logical, ret, read_len);
|
||||
return -EIO;
|
||||
}
|
||||
*len = read_len;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1811,6 +1811,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
|
||||
int stripes_required = 1;
|
||||
int stripe_index;
|
||||
int i;
|
||||
bool need_raid_map = false;
|
||||
struct btrfs_multi_bio *multi = NULL;
|
||||
|
||||
if (multi_ret && rw == READ) {
|
||||
@ -1848,17 +1849,18 @@ again:
|
||||
}
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK
|
||||
&& multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) {
|
||||
/* RAID[56] write or recovery. Return all stripes */
|
||||
stripes_required = map->num_stripes;
|
||||
need_raid_map = true;
|
||||
/* RAID[56] write or recovery. Return all stripes */
|
||||
stripes_required = map->num_stripes;
|
||||
|
||||
/* Only allocate the map if we've already got a large enough multi_ret */
|
||||
if (stripes_allocated >= stripes_required) {
|
||||
raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
|
||||
if (!raid_map) {
|
||||
kfree(multi);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
/* Only allocate the map if we've already got a large enough multi_ret */
|
||||
if (stripes_allocated >= stripes_required) {
|
||||
raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
|
||||
if (!raid_map) {
|
||||
kfree(multi);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* if our multi bio struct is too small, back off and try again */
|
||||
@ -1896,6 +1898,7 @@ again:
|
||||
goto out;
|
||||
|
||||
multi->num_stripes = 1;
|
||||
multi->type = map->type;
|
||||
stripe_index = 0;
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
|
||||
if (rw == WRITE)
|
||||
@ -1922,7 +1925,7 @@ again:
|
||||
else if (mirror_num)
|
||||
stripe_index = mirror_num - 1;
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
||||
if (raid_map) {
|
||||
if (need_raid_map && raid_map) {
|
||||
int rot;
|
||||
u64 tmp;
|
||||
u64 raid56_full_stripe_start;
|
||||
|
@ -106,6 +106,7 @@ struct btrfs_bio_stripe {
|
||||
};
|
||||
|
||||
struct btrfs_multi_bio {
|
||||
u64 type;
|
||||
int error;
|
||||
int num_stripes;
|
||||
struct btrfs_bio_stripe stripes[];
|
||||
|
Loading…
Reference in New Issue
Block a user