mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
md updates for 4.3
- An assortment of little fixes, several for minor races only likely to be hit during testing - further cluster-md-raid1 development, not ready for real use yet. - new RAID6 syndrome code for ARM NEON - fix a race where a write can return before failure of one device is properly recorded in metadata, so an immediate crash might result in that write being lost. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJV6rSXAAoJEDnsnt1WYoG5eJIQAJs62+kB3+p87/VEu4hiBgYv yyaCBlTDn3xxy3WFLtvSIc+cZOamvGe/u/+9/aTA5zq30VpS0fwZlLUxwyR3vB7H aXh5y0JL8fViCUp6o+SplOpNDMAv4ntcW5NMv7uWhPLxtQxF/IJu6YLsDRcFaJqL LFCpvKSPgXOQ88ZXHa54xgFgEy+aAh1lxaWQmeqCLtgVc6YhwIsazG00R/vow8Pb 91u3jFioWjBpovTJiRxQO+NGemfOnKrm2EWkR4jzo8taHOouBWOH0RZjh/67dh4p QX4GjMINhFvYSr1UMGXfPm+Fjp2PRgx1qKyR/XhPeXNuE2xZf7T4aCnmKA8DVUA4 vyEl/l0lAZClExNA+bgE/wCrMpvtb9E4NnklzIffDqsDY79m9JzLwznYqDQcXP7m 0zPlRmf8KQoSOVV960N2O6siwQMwvTyPecG0raAv9BKjwZ+7/M8HLOplZuuMsbzT BZ6+FnAIDtc0Id0wwJoARUkghG7Nr4IWi4Q8MtyYLgH9KLnYkomjf/I2B5sEooCF JFIXeg+XX/xKSFHV4TycYdAFMtMEJMJ/pEnbKJ/W7CyAmrHJv+0/U+/gOkA8Mg76 iqYVWqRJHP9ZyWpmaWaaOeGIgFoJqrjM65qFNRcOnzMd/aAi8W63oyM99Lxi+1pm i8StqQBNtiwzds/w32SI =n+/k -----END PGP SIGNATURE----- Merge tag 'md/4.3' of git://neil.brown.name/md Pull md updates from Neil Brown: - an assortment of little fixes, several for minor races only likely to be hit during testing - further cluster-md-raid1 development, not ready for real use yet. - new RAID6 syndrome code for ARM NEON - fix a race where a write can return before failure of one device is properly recorded in metadata, so an immediate crash might result in that write being lost. * tag 'md/4.3' of git://neil.brown.name/md: (33 commits) md/raid5: ensure device failure recorded before write request returns. md/raid5: use bio_list for the list of bios to return. md/raid10: ensure device failure recorded before write request returns. md/raid1: ensure device failure recorded before write request returns. md-cluster: remove inappropriate try_module_get from join() md: extend spinlock protection in register_md_cluster_operations md-cluster: Read the disk bitmap sb and check if it needs recovery md-cluster: only call complete(&cinfo->completion) when node join cluster md-cluster: add missed lockres_free md-cluster: remove the unused sb_lock md-cluster: init suspend_list and suspend_lock early in join md-cluster: add the error check if failed to get dlm lock md-cluster: init completion within lockres_init md-cluster: fix deadlock issue on message lock md-cluster: transfer the resync ownership to another node md-cluster: split recover_slot for future code reuse md-cluster: use %pU to print UUIDs md: setup safemode_timer before it's being used md/raid5: handle possible race as reshape completes. md: sync sync_completed has correct value as recovery finishes. ...
This commit is contained in:
commit
2a013e37ce
@ -91,7 +91,7 @@ The algorithm is:
|
||||
this message inappropriate or redundant.
|
||||
|
||||
3. sender write LVB.
|
||||
sender down-convert MESSAGE from EX to CR
|
||||
sender down-convert MESSAGE from EX to CW
|
||||
sender try to get EX of ACK
|
||||
[ wait until all receiver has *processed* the MESSAGE ]
|
||||
|
||||
@ -112,7 +112,7 @@ The algorithm is:
|
||||
sender down-convert ACK from EX to CR
|
||||
sender release MESSAGE
|
||||
sender release TOKEN
|
||||
receiver upconvert to EX of MESSAGE
|
||||
receiver upconvert to PR of MESSAGE
|
||||
receiver get CR of ACK
|
||||
receiver release MESSAGE
|
||||
|
||||
|
@ -45,6 +45,7 @@ struct resync_info {
|
||||
/* md_cluster_info flags */
|
||||
#define MD_CLUSTER_WAITING_FOR_NEWDISK 1
|
||||
#define MD_CLUSTER_SUSPEND_READ_BALANCING 2
|
||||
#define MD_CLUSTER_BEGIN_JOIN_CLUSTER 3
|
||||
|
||||
|
||||
struct md_cluster_info {
|
||||
@ -52,7 +53,6 @@ struct md_cluster_info {
|
||||
dlm_lockspace_t *lockspace;
|
||||
int slot_number;
|
||||
struct completion completion;
|
||||
struct dlm_lock_resource *sb_lock;
|
||||
struct mutex sb_mutex;
|
||||
struct dlm_lock_resource *bitmap_lockres;
|
||||
struct list_head suspend_list;
|
||||
@ -75,6 +75,7 @@ enum msg_type {
|
||||
NEWDISK,
|
||||
REMOVE,
|
||||
RE_ADD,
|
||||
BITMAP_NEEDS_SYNC,
|
||||
};
|
||||
|
||||
struct cluster_msg {
|
||||
@ -99,7 +100,6 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
init_completion(&res->completion);
|
||||
ret = dlm_lock(res->ls, mode, &res->lksb,
|
||||
res->flags, res->name, strlen(res->name),
|
||||
0, sync_ast, res, res->bast);
|
||||
@ -124,6 +124,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
|
||||
res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
|
||||
if (!res)
|
||||
return NULL;
|
||||
init_completion(&res->completion);
|
||||
res->ls = cinfo->lockspace;
|
||||
res->mddev = mddev;
|
||||
namelen = strlen(name);
|
||||
@ -165,11 +166,24 @@ out_err:
|
||||
|
||||
static void lockres_free(struct dlm_lock_resource *res)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!res)
|
||||
return;
|
||||
|
||||
init_completion(&res->completion);
|
||||
dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
|
||||
/* cancel a lock request or a conversion request that is blocked */
|
||||
res->flags |= DLM_LKF_CANCEL;
|
||||
retry:
|
||||
ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
|
||||
if (unlikely(ret != 0)) {
|
||||
pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret);
|
||||
|
||||
/* if a lock conversion is cancelled, then the lock is put
|
||||
* back to grant queue, need to ensure it is unlocked */
|
||||
if (ret == -DLM_ECANCEL)
|
||||
goto retry;
|
||||
}
|
||||
res->flags &= ~DLM_LKF_CANCEL;
|
||||
wait_for_completion(&res->completion);
|
||||
|
||||
kfree(res->name);
|
||||
@ -177,18 +191,6 @@ static void lockres_free(struct dlm_lock_resource *res)
|
||||
kfree(res);
|
||||
}
|
||||
|
||||
static char *pretty_uuid(char *dest, char *src)
|
||||
{
|
||||
int i, len = 0;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
if (i == 4 || i == 6 || i == 8 || i == 10)
|
||||
len += sprintf(dest + len, "-");
|
||||
len += sprintf(dest + len, "%02x", (__u8)src[i]);
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
static void add_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres,
|
||||
sector_t lo, sector_t hi)
|
||||
{
|
||||
@ -281,16 +283,11 @@ static void recover_prep(void *arg)
|
||||
set_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
|
||||
}
|
||||
|
||||
static void recover_slot(void *arg, struct dlm_slot *slot)
|
||||
static void __recover_slot(struct mddev *mddev, int slot)
|
||||
{
|
||||
struct mddev *mddev = arg;
|
||||
struct md_cluster_info *cinfo = mddev->cluster_info;
|
||||
|
||||
pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n",
|
||||
mddev->bitmap_info.cluster_name,
|
||||
slot->nodeid, slot->slot,
|
||||
cinfo->slot_number);
|
||||
set_bit(slot->slot - 1, &cinfo->recovery_map);
|
||||
set_bit(slot, &cinfo->recovery_map);
|
||||
if (!cinfo->recovery_thread) {
|
||||
cinfo->recovery_thread = md_register_thread(recover_bitmaps,
|
||||
mddev, "recover");
|
||||
@ -302,6 +299,20 @@ static void recover_slot(void *arg, struct dlm_slot *slot)
|
||||
md_wakeup_thread(cinfo->recovery_thread);
|
||||
}
|
||||
|
||||
static void recover_slot(void *arg, struct dlm_slot *slot)
|
||||
{
|
||||
struct mddev *mddev = arg;
|
||||
struct md_cluster_info *cinfo = mddev->cluster_info;
|
||||
|
||||
pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n",
|
||||
mddev->bitmap_info.cluster_name,
|
||||
slot->nodeid, slot->slot,
|
||||
cinfo->slot_number);
|
||||
/* deduct one since dlm slot starts from one while the num of
|
||||
* cluster-md begins with 0 */
|
||||
__recover_slot(mddev, slot->slot - 1);
|
||||
}
|
||||
|
||||
static void recover_done(void *arg, struct dlm_slot *slots,
|
||||
int num_slots, int our_slot,
|
||||
uint32_t generation)
|
||||
@ -310,10 +321,17 @@ static void recover_done(void *arg, struct dlm_slot *slots,
|
||||
struct md_cluster_info *cinfo = mddev->cluster_info;
|
||||
|
||||
cinfo->slot_number = our_slot;
|
||||
complete(&cinfo->completion);
|
||||
/* completion is only need to be complete when node join cluster,
|
||||
* it doesn't need to run during another node's failure */
|
||||
if (test_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state)) {
|
||||
complete(&cinfo->completion);
|
||||
clear_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
|
||||
}
|
||||
clear_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
|
||||
}
|
||||
|
||||
/* the ops is called when node join the cluster, and do lock recovery
|
||||
* if node failure occurs */
|
||||
static const struct dlm_lockspace_ops md_ls_ops = {
|
||||
.recover_prep = recover_prep,
|
||||
.recover_slot = recover_slot,
|
||||
@ -388,7 +406,7 @@ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
|
||||
int len;
|
||||
|
||||
len = snprintf(disk_uuid, 64, "DEVICE_UUID=");
|
||||
pretty_uuid(disk_uuid + len, cmsg->uuid);
|
||||
sprintf(disk_uuid + len, "%pU", cmsg->uuid);
|
||||
snprintf(raid_slot, 16, "RAID_DISK=%d", cmsg->raid_slot);
|
||||
pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot);
|
||||
init_completion(&cinfo->newdisk_completion);
|
||||
@ -457,6 +475,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
|
||||
__func__, __LINE__, msg->slot);
|
||||
process_readd_disk(mddev, msg);
|
||||
break;
|
||||
case BITMAP_NEEDS_SYNC:
|
||||
pr_info("%s: %d Received BITMAP_NEEDS_SYNC from %d\n",
|
||||
__func__, __LINE__, msg->slot);
|
||||
__recover_slot(mddev, msg->slot);
|
||||
break;
|
||||
default:
|
||||
pr_warn("%s:%d Received unknown message from %d\n",
|
||||
__func__, __LINE__, msg->slot);
|
||||
@ -472,6 +495,7 @@ static void recv_daemon(struct md_thread *thread)
|
||||
struct dlm_lock_resource *ack_lockres = cinfo->ack_lockres;
|
||||
struct dlm_lock_resource *message_lockres = cinfo->message_lockres;
|
||||
struct cluster_msg msg;
|
||||
int ret;
|
||||
|
||||
/*get CR on Message*/
|
||||
if (dlm_lock_sync(message_lockres, DLM_LOCK_CR)) {
|
||||
@ -484,13 +508,21 @@ static void recv_daemon(struct md_thread *thread)
|
||||
process_recvd_msg(thread->mddev, &msg);
|
||||
|
||||
/*release CR on ack_lockres*/
|
||||
dlm_unlock_sync(ack_lockres);
|
||||
/*up-convert to EX on message_lockres*/
|
||||
dlm_lock_sync(message_lockres, DLM_LOCK_EX);
|
||||
ret = dlm_unlock_sync(ack_lockres);
|
||||
if (unlikely(ret != 0))
|
||||
pr_info("unlock ack failed return %d\n", ret);
|
||||
/*up-convert to PR on message_lockres*/
|
||||
ret = dlm_lock_sync(message_lockres, DLM_LOCK_PR);
|
||||
if (unlikely(ret != 0))
|
||||
pr_info("lock PR on msg failed return %d\n", ret);
|
||||
/*get CR on ack_lockres again*/
|
||||
dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
|
||||
ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
|
||||
if (unlikely(ret != 0))
|
||||
pr_info("lock CR on ack failed return %d\n", ret);
|
||||
/*release CR on message_lockres*/
|
||||
dlm_unlock_sync(message_lockres);
|
||||
ret = dlm_unlock_sync(message_lockres);
|
||||
if (unlikely(ret != 0))
|
||||
pr_info("unlock msg failed return %d\n", ret);
|
||||
}
|
||||
|
||||
/* lock_comm()
|
||||
@ -519,7 +551,7 @@ static void unlock_comm(struct md_cluster_info *cinfo)
|
||||
* The function:
|
||||
* 1. Grabs the message lockresource in EX mode
|
||||
* 2. Copies the message to the message LVB
|
||||
* 3. Downconverts message lockresource to CR
|
||||
* 3. Downconverts message lockresource to CW
|
||||
* 4. Upconverts ack lock resource from CR to EX. This forces the BAST on other nodes
|
||||
* and the other nodes read the message. The thread will wait here until all other
|
||||
* nodes have released ack lock resource.
|
||||
@ -540,12 +572,12 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
|
||||
|
||||
memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
|
||||
sizeof(struct cluster_msg));
|
||||
/*down-convert EX to CR on Message*/
|
||||
error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CR);
|
||||
/*down-convert EX to CW on Message*/
|
||||
error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CW);
|
||||
if (error) {
|
||||
pr_err("md-cluster: failed to convert EX to CR on MESSAGE(%d)\n",
|
||||
pr_err("md-cluster: failed to convert EX to CW on MESSAGE(%d)\n",
|
||||
error);
|
||||
goto failed_message;
|
||||
goto failed_ack;
|
||||
}
|
||||
|
||||
/*up-convert CR to EX on Ack*/
|
||||
@ -565,7 +597,13 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
|
||||
}
|
||||
|
||||
failed_ack:
|
||||
dlm_unlock_sync(cinfo->message_lockres);
|
||||
error = dlm_unlock_sync(cinfo->message_lockres);
|
||||
if (unlikely(error != 0)) {
|
||||
pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
|
||||
error);
|
||||
/* in case the message can't be released due to some reason */
|
||||
goto failed_ack;
|
||||
}
|
||||
failed_message:
|
||||
return error;
|
||||
}
|
||||
@ -587,6 +625,7 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
|
||||
struct dlm_lock_resource *bm_lockres;
|
||||
struct suspend_info *s;
|
||||
char str[64];
|
||||
sector_t lo, hi;
|
||||
|
||||
|
||||
for (i = 0; i < total_slots; i++) {
|
||||
@ -617,9 +656,24 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
|
||||
lockres_free(bm_lockres);
|
||||
continue;
|
||||
}
|
||||
if (ret)
|
||||
if (ret) {
|
||||
lockres_free(bm_lockres);
|
||||
goto out;
|
||||
/* TODO: Read the disk bitmap sb and check if it needs recovery */
|
||||
}
|
||||
|
||||
/* Read the disk bitmap sb and check if it needs recovery */
|
||||
ret = bitmap_copy_from_slot(mddev, i, &lo, &hi, false);
|
||||
if (ret) {
|
||||
pr_warn("md-cluster: Could not gather bitmaps from slot %d", i);
|
||||
lockres_free(bm_lockres);
|
||||
continue;
|
||||
}
|
||||
if ((hi > 0) && (lo < mddev->recovery_cp)) {
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
mddev->recovery_cp = lo;
|
||||
md_check_recovery(mddev);
|
||||
}
|
||||
|
||||
dlm_unlock_sync(bm_lockres);
|
||||
lockres_free(bm_lockres);
|
||||
}
|
||||
@ -633,20 +687,20 @@ static int join(struct mddev *mddev, int nodes)
|
||||
int ret, ops_rv;
|
||||
char str[64];
|
||||
|
||||
if (!try_module_get(THIS_MODULE))
|
||||
return -ENOENT;
|
||||
|
||||
cinfo = kzalloc(sizeof(struct md_cluster_info), GFP_KERNEL);
|
||||
if (!cinfo)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&cinfo->suspend_list);
|
||||
spin_lock_init(&cinfo->suspend_lock);
|
||||
init_completion(&cinfo->completion);
|
||||
set_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
|
||||
|
||||
mutex_init(&cinfo->sb_mutex);
|
||||
mddev->cluster_info = cinfo;
|
||||
|
||||
memset(str, 0, 64);
|
||||
pretty_uuid(str, mddev->uuid);
|
||||
sprintf(str, "%pU", mddev->uuid);
|
||||
ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name,
|
||||
DLM_LSFL_FS, LVB_SIZE,
|
||||
&md_ls_ops, mddev, &ops_rv, &cinfo->lockspace);
|
||||
@ -659,12 +713,6 @@ static int join(struct mddev *mddev, int nodes)
|
||||
ret = -ERANGE;
|
||||
goto err;
|
||||
}
|
||||
cinfo->sb_lock = lockres_init(mddev, "cmd-super",
|
||||
NULL, 0);
|
||||
if (!cinfo->sb_lock) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
/* Initiate the communication resources */
|
||||
ret = -ENOMEM;
|
||||
cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
|
||||
@ -705,9 +753,6 @@ static int join(struct mddev *mddev, int nodes)
|
||||
goto err;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&cinfo->suspend_list);
|
||||
spin_lock_init(&cinfo->suspend_lock);
|
||||
|
||||
ret = gather_all_resync_info(mddev, nodes);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -719,12 +764,10 @@ err:
|
||||
lockres_free(cinfo->ack_lockres);
|
||||
lockres_free(cinfo->no_new_dev_lockres);
|
||||
lockres_free(cinfo->bitmap_lockres);
|
||||
lockres_free(cinfo->sb_lock);
|
||||
if (cinfo->lockspace)
|
||||
dlm_release_lockspace(cinfo->lockspace, 2);
|
||||
mddev->cluster_info = NULL;
|
||||
kfree(cinfo);
|
||||
module_put(THIS_MODULE);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -740,7 +783,6 @@ static int leave(struct mddev *mddev)
|
||||
lockres_free(cinfo->token_lockres);
|
||||
lockres_free(cinfo->ack_lockres);
|
||||
lockres_free(cinfo->no_new_dev_lockres);
|
||||
lockres_free(cinfo->sb_lock);
|
||||
lockres_free(cinfo->bitmap_lockres);
|
||||
dlm_release_lockspace(cinfo->lockspace, 2);
|
||||
return 0;
|
||||
@ -817,8 +859,17 @@ static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
|
||||
|
||||
static void resync_finish(struct mddev *mddev)
|
||||
{
|
||||
struct md_cluster_info *cinfo = mddev->cluster_info;
|
||||
struct cluster_msg cmsg;
|
||||
int slot = cinfo->slot_number - 1;
|
||||
|
||||
pr_info("%s:%d\n", __func__, __LINE__);
|
||||
resync_send(mddev, RESYNCING, 0, 0);
|
||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC);
|
||||
cmsg.slot = cpu_to_le32(slot);
|
||||
sendmsg(cinfo, &cmsg);
|
||||
}
|
||||
}
|
||||
|
||||
static int area_resyncing(struct mddev *mddev, int direction,
|
||||
|
110
drivers/md/md.c
110
drivers/md/md.c
@ -483,6 +483,8 @@ static void mddev_put(struct mddev *mddev)
|
||||
bioset_free(bs);
|
||||
}
|
||||
|
||||
static void md_safemode_timeout(unsigned long data);
|
||||
|
||||
void mddev_init(struct mddev *mddev)
|
||||
{
|
||||
mutex_init(&mddev->open_mutex);
|
||||
@ -490,7 +492,8 @@ void mddev_init(struct mddev *mddev)
|
||||
mutex_init(&mddev->bitmap_info.mutex);
|
||||
INIT_LIST_HEAD(&mddev->disks);
|
||||
INIT_LIST_HEAD(&mddev->all_mddevs);
|
||||
init_timer(&mddev->safemode_timer);
|
||||
setup_timer(&mddev->safemode_timer, md_safemode_timeout,
|
||||
(unsigned long) mddev);
|
||||
atomic_set(&mddev->active, 1);
|
||||
atomic_set(&mddev->openers, 0);
|
||||
atomic_set(&mddev->active_io, 0);
|
||||
@ -3255,8 +3258,6 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void md_safemode_timeout(unsigned long data);
|
||||
|
||||
static ssize_t
|
||||
safe_delay_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
@ -4189,6 +4190,8 @@ action_show(struct mddev *mddev, char *page)
|
||||
type = "repair";
|
||||
} else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
|
||||
type = "recover";
|
||||
else if (mddev->reshape_position != MaxSector)
|
||||
type = "reshape";
|
||||
}
|
||||
return sprintf(page, "%s\n", type);
|
||||
}
|
||||
@ -5180,8 +5183,6 @@ int md_run(struct mddev *mddev)
|
||||
atomic_set(&mddev->max_corr_read_errors,
|
||||
MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
|
||||
mddev->safemode = 0;
|
||||
mddev->safemode_timer.function = md_safemode_timeout;
|
||||
mddev->safemode_timer.data = (unsigned long) mddev;
|
||||
mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
|
||||
mddev->in_sync = 1;
|
||||
smp_wmb();
|
||||
@ -5194,6 +5195,11 @@ int md_run(struct mddev *mddev)
|
||||
if (sysfs_link_rdev(mddev, rdev))
|
||||
/* failure here is OK */;
|
||||
|
||||
if (mddev->degraded && !mddev->ro)
|
||||
/* This ensures that recovering status is reported immediately
|
||||
* via sysfs - until a lack of spares is confirmed.
|
||||
*/
|
||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
|
||||
if (mddev->flags & MD_UPDATE_SB_FLAGS)
|
||||
@ -5741,16 +5747,16 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg)
|
||||
|
||||
err = 0;
|
||||
spin_lock(&mddev->lock);
|
||||
/* bitmap disabled, zero the first byte and copy out */
|
||||
if (!mddev->bitmap_info.file)
|
||||
file->pathname[0] = '\0';
|
||||
else if ((ptr = file_path(mddev->bitmap_info.file,
|
||||
file->pathname, sizeof(file->pathname))),
|
||||
IS_ERR(ptr))
|
||||
err = PTR_ERR(ptr);
|
||||
else
|
||||
memmove(file->pathname, ptr,
|
||||
sizeof(file->pathname)-(ptr-file->pathname));
|
||||
/* bitmap enabled */
|
||||
if (mddev->bitmap_info.file) {
|
||||
ptr = file_path(mddev->bitmap_info.file, file->pathname,
|
||||
sizeof(file->pathname));
|
||||
if (IS_ERR(ptr))
|
||||
err = PTR_ERR(ptr);
|
||||
else
|
||||
memmove(file->pathname, ptr,
|
||||
sizeof(file->pathname)-(ptr-file->pathname));
|
||||
}
|
||||
spin_unlock(&mddev->lock);
|
||||
|
||||
if (err == 0 &&
|
||||
@ -7069,7 +7075,7 @@ static void status_unused(struct seq_file *seq)
|
||||
seq_printf(seq, "\n");
|
||||
}
|
||||
|
||||
static void status_resync(struct seq_file *seq, struct mddev *mddev)
|
||||
static int status_resync(struct seq_file *seq, struct mddev *mddev)
|
||||
{
|
||||
sector_t max_sectors, resync, res;
|
||||
unsigned long dt, db;
|
||||
@ -7077,18 +7083,32 @@ static void status_resync(struct seq_file *seq, struct mddev *mddev)
|
||||
int scale;
|
||||
unsigned int per_milli;
|
||||
|
||||
if (mddev->curr_resync <= 3)
|
||||
resync = 0;
|
||||
else
|
||||
resync = mddev->curr_resync
|
||||
- atomic_read(&mddev->recovery_active);
|
||||
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||
max_sectors = mddev->resync_max_sectors;
|
||||
else
|
||||
max_sectors = mddev->dev_sectors;
|
||||
|
||||
resync = mddev->curr_resync;
|
||||
if (resync <= 3) {
|
||||
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
|
||||
/* Still cleaning up */
|
||||
resync = max_sectors;
|
||||
} else
|
||||
resync -= atomic_read(&mddev->recovery_active);
|
||||
|
||||
if (resync == 0) {
|
||||
if (mddev->recovery_cp < MaxSector) {
|
||||
seq_printf(seq, "\tresync=PENDING");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (resync < 3) {
|
||||
seq_printf(seq, "\tresync=DELAYED");
|
||||
return 1;
|
||||
}
|
||||
|
||||
WARN_ON(max_sectors == 0);
|
||||
/* Pick 'scale' such that (resync>>scale)*1000 will fit
|
||||
* in a sector_t, and (max_sectors>>scale) will fit in a
|
||||
@ -7153,6 +7173,7 @@ static void status_resync(struct seq_file *seq, struct mddev *mddev)
|
||||
((unsigned long)rt % 60)/6);
|
||||
|
||||
seq_printf(seq, " speed=%ldK/sec", db/2/dt);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void *md_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
@ -7298,13 +7319,8 @@ static int md_seq_show(struct seq_file *seq, void *v)
|
||||
mddev->pers->status(seq, mddev);
|
||||
seq_printf(seq, "\n ");
|
||||
if (mddev->pers->sync_request) {
|
||||
if (mddev->curr_resync > 2) {
|
||||
status_resync(seq, mddev);
|
||||
if (status_resync(seq, mddev))
|
||||
seq_printf(seq, "\n ");
|
||||
} else if (mddev->curr_resync >= 1)
|
||||
seq_printf(seq, "\tresync=DELAYED\n ");
|
||||
else if (mddev->recovery_cp < MaxSector)
|
||||
seq_printf(seq, "\tresync=PENDING\n ");
|
||||
}
|
||||
} else
|
||||
seq_printf(seq, "\n ");
|
||||
@ -7387,15 +7403,19 @@ int unregister_md_personality(struct md_personality *p)
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_md_personality);
|
||||
|
||||
int register_md_cluster_operations(struct md_cluster_operations *ops, struct module *module)
|
||||
int register_md_cluster_operations(struct md_cluster_operations *ops,
|
||||
struct module *module)
|
||||
{
|
||||
if (md_cluster_ops != NULL)
|
||||
return -EALREADY;
|
||||
int ret = 0;
|
||||
spin_lock(&pers_lock);
|
||||
md_cluster_ops = ops;
|
||||
md_cluster_mod = module;
|
||||
if (md_cluster_ops != NULL)
|
||||
ret = -EALREADY;
|
||||
else {
|
||||
md_cluster_ops = ops;
|
||||
md_cluster_mod = module;
|
||||
}
|
||||
spin_unlock(&pers_lock);
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(register_md_cluster_operations);
|
||||
|
||||
@ -7793,7 +7813,8 @@ void md_do_sync(struct md_thread *thread)
|
||||
> (max_sectors >> 4)) ||
|
||||
time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
|
||||
(j - mddev->curr_resync_completed)*2
|
||||
>= mddev->resync_max - mddev->curr_resync_completed
|
||||
>= mddev->resync_max - mddev->curr_resync_completed ||
|
||||
mddev->curr_resync_completed > mddev->resync_max
|
||||
)) {
|
||||
/* time to update curr_resync_completed */
|
||||
wait_event(mddev->recovery_wait,
|
||||
@ -7838,6 +7859,9 @@ void md_do_sync(struct md_thread *thread)
|
||||
break;
|
||||
|
||||
j += sectors;
|
||||
if (j > max_sectors)
|
||||
/* when skipping, extra large numbers can be returned. */
|
||||
j = max_sectors;
|
||||
if (j > 2)
|
||||
mddev->curr_resync = j;
|
||||
if (mddev_is_clustered(mddev))
|
||||
@ -7906,12 +7930,15 @@ void md_do_sync(struct md_thread *thread)
|
||||
blk_finish_plug(&plug);
|
||||
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
|
||||
|
||||
if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||
mddev->curr_resync > 2) {
|
||||
mddev->curr_resync_completed = mddev->curr_resync;
|
||||
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
||||
}
|
||||
/* tell personality that we are finished */
|
||||
mddev->pers->sync_request(mddev, max_sectors, &skipped);
|
||||
|
||||
if (mddev_is_clustered(mddev))
|
||||
md_cluster_ops->resync_finish(mddev);
|
||||
|
||||
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
|
||||
mddev->curr_resync > 2) {
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||
@ -7945,6 +7972,9 @@ void md_do_sync(struct md_thread *thread)
|
||||
}
|
||||
}
|
||||
skip:
|
||||
if (mddev_is_clustered(mddev))
|
||||
md_cluster_ops->resync_finish(mddev);
|
||||
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
|
||||
spin_lock(&mddev->lock);
|
||||
@ -7955,11 +7985,11 @@ void md_do_sync(struct md_thread *thread)
|
||||
mddev->resync_max = MaxSector;
|
||||
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||
mddev->resync_min = mddev->curr_resync_completed;
|
||||
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||
mddev->curr_resync = 0;
|
||||
spin_unlock(&mddev->lock);
|
||||
|
||||
wake_up(&resync_wait);
|
||||
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
return;
|
||||
}
|
||||
@ -8128,6 +8158,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||
*/
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_reap_sync_thread(mddev);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
goto unlock;
|
||||
}
|
||||
@ -8574,6 +8605,7 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
/* Make sure they get written out promptly */
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||
set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
|
||||
set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
|
||||
md_wakeup_thread(rdev->mddev->thread);
|
||||
}
|
||||
return rv;
|
||||
|
@ -83,7 +83,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
char b[BDEVNAME_SIZE];
|
||||
char b2[BDEVNAME_SIZE];
|
||||
struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
|
||||
bool discard_supported = false;
|
||||
unsigned short blksize = 512;
|
||||
|
||||
if (!conf)
|
||||
return -ENOMEM;
|
||||
@ -98,6 +98,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
sector_div(sectors, mddev->chunk_sectors);
|
||||
rdev1->sectors = sectors * mddev->chunk_sectors;
|
||||
|
||||
blksize = max(blksize, queue_logical_block_size(
|
||||
rdev1->bdev->bd_disk->queue));
|
||||
|
||||
rdev_for_each(rdev2, mddev) {
|
||||
pr_debug("md/raid0:%s: comparing %s(%llu)"
|
||||
" with %s(%llu)\n",
|
||||
@ -134,6 +137,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
}
|
||||
pr_debug("md/raid0:%s: FINAL %d zones\n",
|
||||
mdname(mddev), conf->nr_strip_zones);
|
||||
/*
|
||||
* now since we have the hard sector sizes, we can make sure
|
||||
* chunk size is a multiple of that sector size
|
||||
*/
|
||||
if ((mddev->chunk_sectors << 9) % blksize) {
|
||||
printk(KERN_ERR "md/raid0:%s: chunk_size of %d not multiple of block size %d\n",
|
||||
mdname(mddev),
|
||||
mddev->chunk_sectors << 9, blksize);
|
||||
err = -EINVAL;
|
||||
goto abort;
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
|
||||
conf->nr_strip_zones, GFP_KERNEL);
|
||||
@ -188,16 +203,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
}
|
||||
dev[j] = rdev1;
|
||||
|
||||
if (mddev->queue)
|
||||
disk_stack_limits(mddev->gendisk, rdev1->bdev,
|
||||
rdev1->data_offset << 9);
|
||||
|
||||
if (!smallest || (rdev1->sectors < smallest->sectors))
|
||||
smallest = rdev1;
|
||||
cnt++;
|
||||
|
||||
if (blk_queue_discard(bdev_get_queue(rdev1->bdev)))
|
||||
discard_supported = true;
|
||||
}
|
||||
if (cnt != mddev->raid_disks) {
|
||||
printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
|
||||
@ -258,28 +266,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
(unsigned long long)smallest->sectors);
|
||||
}
|
||||
|
||||
/*
|
||||
* now since we have the hard sector sizes, we can make sure
|
||||
* chunk size is a multiple of that sector size
|
||||
*/
|
||||
if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
|
||||
printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n",
|
||||
mdname(mddev),
|
||||
mddev->chunk_sectors << 9);
|
||||
goto abort;
|
||||
}
|
||||
|
||||
if (mddev->queue) {
|
||||
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
|
||||
blk_queue_io_opt(mddev->queue,
|
||||
(mddev->chunk_sectors << 9) * mddev->raid_disks);
|
||||
|
||||
if (!discard_supported)
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
else
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
}
|
||||
|
||||
pr_debug("md/raid0:%s: done.\n", mdname(mddev));
|
||||
*private_conf = conf;
|
||||
|
||||
@ -378,12 +364,6 @@ static int raid0_run(struct mddev *mddev)
|
||||
if (md_check_no_bitmap(mddev))
|
||||
return -EINVAL;
|
||||
|
||||
if (mddev->queue) {
|
||||
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
}
|
||||
|
||||
/* if private is not null, we are here after takeover */
|
||||
if (mddev->private == NULL) {
|
||||
ret = create_strip_zones(mddev, &conf);
|
||||
@ -392,6 +372,29 @@ static int raid0_run(struct mddev *mddev)
|
||||
mddev->private = conf;
|
||||
}
|
||||
conf = mddev->private;
|
||||
if (mddev->queue) {
|
||||
struct md_rdev *rdev;
|
||||
bool discard_supported = false;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
|
||||
discard_supported = true;
|
||||
}
|
||||
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
|
||||
|
||||
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
|
||||
blk_queue_io_opt(mddev->queue,
|
||||
(mddev->chunk_sectors << 9) * mddev->raid_disks);
|
||||
|
||||
if (!discard_supported)
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
else
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
}
|
||||
|
||||
/* calculate array device size */
|
||||
md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
|
||||
|
@ -1474,6 +1474,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
*/
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
set_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||
printk(KERN_ALERT
|
||||
"md/raid1:%s: Disk failure on %s, disabling device.\n"
|
||||
"md/raid1:%s: Operation continuing on %d devices.\n",
|
||||
@ -2235,6 +2236,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
|
||||
static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
{
|
||||
int m;
|
||||
bool fail = false;
|
||||
for (m = 0; m < conf->raid_disks * 2 ; m++)
|
||||
if (r1_bio->bios[m] == IO_MADE_GOOD) {
|
||||
struct md_rdev *rdev = conf->mirrors[m].rdev;
|
||||
@ -2247,6 +2249,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
* narrow down and record precise write
|
||||
* errors.
|
||||
*/
|
||||
fail = true;
|
||||
if (!narrow_write_error(r1_bio, m)) {
|
||||
md_error(conf->mddev,
|
||||
conf->mirrors[m].rdev);
|
||||
@ -2258,7 +2261,13 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
}
|
||||
if (test_bit(R1BIO_WriteError, &r1_bio->state))
|
||||
close_write(r1_bio);
|
||||
raid_end_bio_io(r1_bio);
|
||||
if (fail) {
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
} else
|
||||
raid_end_bio_io(r1_bio);
|
||||
}
|
||||
|
||||
static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
@ -2364,6 +2373,23 @@ static void raid1d(struct md_thread *thread)
|
||||
|
||||
md_check_recovery(mddev);
|
||||
|
||||
if (!list_empty_careful(&conf->bio_end_io_list) &&
|
||||
!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
|
||||
LIST_HEAD(tmp);
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
|
||||
list_add(&tmp, &conf->bio_end_io_list);
|
||||
list_del_init(&conf->bio_end_io_list);
|
||||
}
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
while (!list_empty(&tmp)) {
|
||||
r1_bio = list_first_entry(&conf->bio_end_io_list,
|
||||
struct r1bio, retry_list);
|
||||
list_del(&r1_bio->retry_list);
|
||||
raid_end_bio_io(r1_bio);
|
||||
}
|
||||
}
|
||||
|
||||
blk_start_plug(&plug);
|
||||
for (;;) {
|
||||
|
||||
@ -2763,6 +2789,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||
conf->raid_disks = mddev->raid_disks;
|
||||
conf->mddev = mddev;
|
||||
INIT_LIST_HEAD(&conf->retry_list);
|
||||
INIT_LIST_HEAD(&conf->bio_end_io_list);
|
||||
|
||||
spin_lock_init(&conf->resync_lock);
|
||||
init_waitqueue_head(&conf->wait_barrier);
|
||||
@ -3057,6 +3084,7 @@ static int raid1_reshape(struct mddev *mddev)
|
||||
|
||||
unfreeze_array(conf);
|
||||
|
||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
|
||||
|
@ -61,6 +61,11 @@ struct r1conf {
|
||||
* block, or anything else.
|
||||
*/
|
||||
struct list_head retry_list;
|
||||
/* A separate list of r1bio which just need raid_end_bio_io called.
|
||||
* This mustn't happen for writes which had any errors if the superblock
|
||||
* needs to be written.
|
||||
*/
|
||||
struct list_head bio_end_io_list;
|
||||
|
||||
/* queue pending writes to be submitted on unplug */
|
||||
struct bio_list pending_bio_list;
|
||||
|
@ -1589,6 +1589,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
set_bit(Blocked, &rdev->flags);
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
set_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
printk(KERN_ALERT
|
||||
"md/raid10:%s: Disk failure on %s, disabling device.\n"
|
||||
@ -2623,6 +2624,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
||||
}
|
||||
put_buf(r10_bio);
|
||||
} else {
|
||||
bool fail = false;
|
||||
for (m = 0; m < conf->copies; m++) {
|
||||
int dev = r10_bio->devs[m].devnum;
|
||||
struct bio *bio = r10_bio->devs[m].bio;
|
||||
@ -2634,6 +2636,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
||||
r10_bio->sectors, 0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
} else if (bio != NULL && bio->bi_error) {
|
||||
fail = true;
|
||||
if (!narrow_write_error(r10_bio, m)) {
|
||||
md_error(conf->mddev, rdev);
|
||||
set_bit(R10BIO_Degraded,
|
||||
@ -2654,7 +2657,13 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
||||
if (test_bit(R10BIO_WriteError,
|
||||
&r10_bio->state))
|
||||
close_write(r10_bio);
|
||||
raid_end_bio_io(r10_bio);
|
||||
if (fail) {
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
} else
|
||||
raid_end_bio_io(r10_bio);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2669,6 +2678,23 @@ static void raid10d(struct md_thread *thread)
|
||||
|
||||
md_check_recovery(mddev);
|
||||
|
||||
if (!list_empty_careful(&conf->bio_end_io_list) &&
|
||||
!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
|
||||
LIST_HEAD(tmp);
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
|
||||
list_add(&tmp, &conf->bio_end_io_list);
|
||||
list_del_init(&conf->bio_end_io_list);
|
||||
}
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
while (!list_empty(&tmp)) {
|
||||
r10_bio = list_first_entry(&conf->bio_end_io_list,
|
||||
struct r10bio, retry_list);
|
||||
list_del(&r10_bio->retry_list);
|
||||
raid_end_bio_io(r10_bio);
|
||||
}
|
||||
}
|
||||
|
||||
blk_start_plug(&plug);
|
||||
for (;;) {
|
||||
|
||||
@ -3443,6 +3469,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
|
||||
conf->reshape_safe = conf->reshape_progress;
|
||||
spin_lock_init(&conf->device_lock);
|
||||
INIT_LIST_HEAD(&conf->retry_list);
|
||||
INIT_LIST_HEAD(&conf->bio_end_io_list);
|
||||
|
||||
spin_lock_init(&conf->resync_lock);
|
||||
init_waitqueue_head(&conf->wait_barrier);
|
||||
@ -4097,7 +4124,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
|
||||
* at a time, possibly less if that exceeds RESYNC_PAGES,
|
||||
* or we hit a bad block or something.
|
||||
* This might mean we pause for normal IO in the middle of
|
||||
* a chunk, but that is not a problem was mddev->reshape_position
|
||||
* a chunk, but that is not a problem as mddev->reshape_position
|
||||
* can record any location.
|
||||
*
|
||||
* If we will want to write to a location that isn't
|
||||
@ -4121,7 +4148,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
|
||||
*
|
||||
* In all this the minimum difference in data offsets
|
||||
* (conf->offset_diff - always positive) allows a bit of slack,
|
||||
* so next can be after 'safe', but not by more than offset_disk
|
||||
* so next can be after 'safe', but not by more than offset_diff
|
||||
*
|
||||
* We need to prepare all the bios here before we start any IO
|
||||
* to ensure the size we choose is acceptable to all devices.
|
||||
|
@ -53,6 +53,12 @@ struct r10conf {
|
||||
sector_t offset_diff;
|
||||
|
||||
struct list_head retry_list;
|
||||
/* A separate list of r1bio which just need raid_end_bio_io called.
|
||||
* This mustn't happen for writes which had any errors if the superblock
|
||||
* needs to be written.
|
||||
*/
|
||||
struct list_head bio_end_io_list;
|
||||
|
||||
/* queue pending writes and submit them on unplug */
|
||||
struct bio_list pending_bio_list;
|
||||
int pending_count;
|
||||
|
@ -223,18 +223,14 @@ static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
|
||||
return slot;
|
||||
}
|
||||
|
||||
static void return_io(struct bio *return_bi)
|
||||
static void return_io(struct bio_list *return_bi)
|
||||
{
|
||||
struct bio *bi = return_bi;
|
||||
while (bi) {
|
||||
|
||||
return_bi = bi->bi_next;
|
||||
bi->bi_next = NULL;
|
||||
struct bio *bi;
|
||||
while ((bi = bio_list_pop(return_bi)) != NULL) {
|
||||
bi->bi_iter.bi_size = 0;
|
||||
trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
|
||||
bi, 0);
|
||||
bio_endio(bi);
|
||||
bi = return_bi;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1177,7 +1173,7 @@ async_copy_data(int frombio, struct bio *bio, struct page **page,
|
||||
static void ops_complete_biofill(void *stripe_head_ref)
|
||||
{
|
||||
struct stripe_head *sh = stripe_head_ref;
|
||||
struct bio *return_bi = NULL;
|
||||
struct bio_list return_bi = BIO_EMPTY_LIST;
|
||||
int i;
|
||||
|
||||
pr_debug("%s: stripe %llu\n", __func__,
|
||||
@ -1201,17 +1197,15 @@ static void ops_complete_biofill(void *stripe_head_ref)
|
||||
while (rbi && rbi->bi_iter.bi_sector <
|
||||
dev->sector + STRIPE_SECTORS) {
|
||||
rbi2 = r5_next_bio(rbi, dev->sector);
|
||||
if (!raid5_dec_bi_active_stripes(rbi)) {
|
||||
rbi->bi_next = return_bi;
|
||||
return_bi = rbi;
|
||||
}
|
||||
if (!raid5_dec_bi_active_stripes(rbi))
|
||||
bio_list_add(&return_bi, rbi);
|
||||
rbi = rbi2;
|
||||
}
|
||||
}
|
||||
}
|
||||
clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
|
||||
|
||||
return_io(return_bi);
|
||||
return_io(&return_bi);
|
||||
|
||||
set_bit(STRIPE_HANDLE, &sh->state);
|
||||
release_stripe(sh);
|
||||
@ -2517,6 +2511,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
set_bit(Blocked, &rdev->flags);
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
set_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||
printk(KERN_ALERT
|
||||
"md/raid:%s: Disk failure on %s, disabling device.\n"
|
||||
"md/raid:%s: Operation continuing on %d devices.\n",
|
||||
@ -3069,7 +3064,7 @@ static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
|
||||
static void
|
||||
handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
struct stripe_head_state *s, int disks,
|
||||
struct bio **return_bi)
|
||||
struct bio_list *return_bi)
|
||||
{
|
||||
int i;
|
||||
BUG_ON(sh->batch_head);
|
||||
@ -3114,8 +3109,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
bi->bi_error = -EIO;
|
||||
if (!raid5_dec_bi_active_stripes(bi)) {
|
||||
md_write_end(conf->mddev);
|
||||
bi->bi_next = *return_bi;
|
||||
*return_bi = bi;
|
||||
bio_list_add(return_bi, bi);
|
||||
}
|
||||
bi = nextbi;
|
||||
}
|
||||
@ -3139,8 +3133,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
bi->bi_error = -EIO;
|
||||
if (!raid5_dec_bi_active_stripes(bi)) {
|
||||
md_write_end(conf->mddev);
|
||||
bi->bi_next = *return_bi;
|
||||
*return_bi = bi;
|
||||
bio_list_add(return_bi, bi);
|
||||
}
|
||||
bi = bi2;
|
||||
}
|
||||
@ -3163,10 +3156,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
r5_next_bio(bi, sh->dev[i].sector);
|
||||
|
||||
bi->bi_error = -EIO;
|
||||
if (!raid5_dec_bi_active_stripes(bi)) {
|
||||
bi->bi_next = *return_bi;
|
||||
*return_bi = bi;
|
||||
}
|
||||
if (!raid5_dec_bi_active_stripes(bi))
|
||||
bio_list_add(return_bi, bi);
|
||||
bi = nextbi;
|
||||
}
|
||||
}
|
||||
@ -3445,7 +3436,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
|
||||
* never LOCKED, so we don't need to test 'failed' directly.
|
||||
*/
|
||||
static void handle_stripe_clean_event(struct r5conf *conf,
|
||||
struct stripe_head *sh, int disks, struct bio **return_bi)
|
||||
struct stripe_head *sh, int disks, struct bio_list *return_bi)
|
||||
{
|
||||
int i;
|
||||
struct r5dev *dev;
|
||||
@ -3479,8 +3470,7 @@ returnbi:
|
||||
wbi2 = r5_next_bio(wbi, dev->sector);
|
||||
if (!raid5_dec_bi_active_stripes(wbi)) {
|
||||
md_write_end(conf->mddev);
|
||||
wbi->bi_next = *return_bi;
|
||||
*return_bi = wbi;
|
||||
bio_list_add(return_bi, wbi);
|
||||
}
|
||||
wbi = wbi2;
|
||||
}
|
||||
@ -4613,7 +4603,15 @@ finish:
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
}
|
||||
|
||||
return_io(s.return_bi);
|
||||
if (!bio_list_empty(&s.return_bi)) {
|
||||
if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
bio_list_merge(&conf->return_bi, &s.return_bi);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
} else
|
||||
return_io(&s.return_bi);
|
||||
}
|
||||
|
||||
clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
|
||||
}
|
||||
@ -4672,12 +4670,12 @@ static int raid5_congested(struct mddev *mddev, int bits)
|
||||
|
||||
static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
|
||||
unsigned int chunk_sectors = mddev->chunk_sectors;
|
||||
unsigned int chunk_sectors;
|
||||
unsigned int bio_sectors = bio_sectors(bio);
|
||||
|
||||
if (mddev->new_chunk_sectors < mddev->chunk_sectors)
|
||||
chunk_sectors = mddev->new_chunk_sectors;
|
||||
chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
|
||||
return chunk_sectors >=
|
||||
((sector & (chunk_sectors - 1)) + bio_sectors);
|
||||
}
|
||||
@ -5325,6 +5323,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
sector_t stripe_addr;
|
||||
int reshape_sectors;
|
||||
struct list_head stripes;
|
||||
sector_t retn;
|
||||
|
||||
if (sector_nr == 0) {
|
||||
/* If restarting in the middle, skip the initial sectors */
|
||||
@ -5332,6 +5331,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
conf->reshape_progress < raid5_size(mddev, 0, 0)) {
|
||||
sector_nr = raid5_size(mddev, 0, 0)
|
||||
- conf->reshape_progress;
|
||||
} else if (mddev->reshape_backwards &&
|
||||
conf->reshape_progress == MaxSector) {
|
||||
/* shouldn't happen, but just in case, finish up.*/
|
||||
sector_nr = MaxSector;
|
||||
} else if (!mddev->reshape_backwards &&
|
||||
conf->reshape_progress > 0)
|
||||
sector_nr = conf->reshape_progress;
|
||||
@ -5340,7 +5343,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
mddev->curr_resync_completed = sector_nr;
|
||||
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
||||
*skipped = 1;
|
||||
return sector_nr;
|
||||
retn = sector_nr;
|
||||
goto finish;
|
||||
}
|
||||
}
|
||||
|
||||
@ -5348,10 +5352,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
* If old and new chunk sizes differ, we need to process the
|
||||
* largest of these
|
||||
*/
|
||||
if (mddev->new_chunk_sectors > mddev->chunk_sectors)
|
||||
reshape_sectors = mddev->new_chunk_sectors;
|
||||
else
|
||||
reshape_sectors = mddev->chunk_sectors;
|
||||
|
||||
reshape_sectors = max(conf->chunk_sectors, conf->prev_chunk_sectors);
|
||||
|
||||
/* We update the metadata at least every 10 seconds, or when
|
||||
* the data about to be copied would over-write the source of
|
||||
@ -5366,11 +5368,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
safepos = conf->reshape_safe;
|
||||
sector_div(safepos, data_disks);
|
||||
if (mddev->reshape_backwards) {
|
||||
writepos -= min_t(sector_t, reshape_sectors, writepos);
|
||||
BUG_ON(writepos < reshape_sectors);
|
||||
writepos -= reshape_sectors;
|
||||
readpos += reshape_sectors;
|
||||
safepos += reshape_sectors;
|
||||
} else {
|
||||
writepos += reshape_sectors;
|
||||
/* readpos and safepos are worst-case calculations.
|
||||
* A negative number is overly pessimistic, and causes
|
||||
* obvious problems for unsigned storage. So clip to 0.
|
||||
*/
|
||||
readpos -= min_t(sector_t, reshape_sectors, readpos);
|
||||
safepos -= min_t(sector_t, reshape_sectors, safepos);
|
||||
}
|
||||
@ -5513,7 +5520,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
* then we need to write out the superblock.
|
||||
*/
|
||||
sector_nr += reshape_sectors;
|
||||
if ((sector_nr - mddev->curr_resync_completed) * 2
|
||||
retn = reshape_sectors;
|
||||
finish:
|
||||
if (mddev->curr_resync_completed > mddev->resync_max ||
|
||||
(sector_nr - mddev->curr_resync_completed) * 2
|
||||
>= mddev->resync_max - mddev->curr_resync_completed) {
|
||||
/* Cannot proceed until we've updated the superblock... */
|
||||
wait_event(conf->wait_for_overlap,
|
||||
@ -5538,7 +5548,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
||||
}
|
||||
ret:
|
||||
return reshape_sectors;
|
||||
return retn;
|
||||
}
|
||||
|
||||
static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
|
||||
@ -5794,6 +5804,18 @@ static void raid5d(struct md_thread *thread)
|
||||
|
||||
md_check_recovery(mddev);
|
||||
|
||||
if (!bio_list_empty(&conf->return_bi) &&
|
||||
!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
|
||||
struct bio_list tmp = BIO_EMPTY_LIST;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
|
||||
bio_list_merge(&tmp, &conf->return_bi);
|
||||
bio_list_init(&conf->return_bi);
|
||||
}
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
return_io(&tmp);
|
||||
}
|
||||
|
||||
blk_start_plug(&plug);
|
||||
handled = 0;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
@ -6234,8 +6256,8 @@ raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
|
||||
/* size is defined by the smallest of previous and new size */
|
||||
raid_disks = min(conf->raid_disks, conf->previous_raid_disks);
|
||||
|
||||
sectors &= ~((sector_t)mddev->chunk_sectors - 1);
|
||||
sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
|
||||
sectors &= ~((sector_t)conf->chunk_sectors - 1);
|
||||
sectors &= ~((sector_t)conf->prev_chunk_sectors - 1);
|
||||
return sectors * (raid_disks - conf->max_degraded);
|
||||
}
|
||||
|
||||
@ -6453,6 +6475,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
INIT_LIST_HEAD(&conf->hold_list);
|
||||
INIT_LIST_HEAD(&conf->delayed_list);
|
||||
INIT_LIST_HEAD(&conf->bitmap_list);
|
||||
bio_list_init(&conf->return_bi);
|
||||
init_llist_head(&conf->released_stripes);
|
||||
atomic_set(&conf->active_stripes, 0);
|
||||
atomic_set(&conf->preread_active_stripes, 0);
|
||||
@ -6542,6 +6565,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
if (conf->reshape_progress != MaxSector) {
|
||||
conf->prev_chunk_sectors = mddev->chunk_sectors;
|
||||
conf->prev_algo = mddev->layout;
|
||||
} else {
|
||||
conf->prev_chunk_sectors = conf->chunk_sectors;
|
||||
conf->prev_algo = conf->algorithm;
|
||||
}
|
||||
|
||||
conf->min_nr_stripes = NR_STRIPES;
|
||||
@ -6661,6 +6687,8 @@ static int run(struct mddev *mddev)
|
||||
sector_t here_new, here_old;
|
||||
int old_disks;
|
||||
int max_degraded = (mddev->level == 6 ? 2 : 1);
|
||||
int chunk_sectors;
|
||||
int new_data_disks;
|
||||
|
||||
if (mddev->new_level != mddev->level) {
|
||||
printk(KERN_ERR "md/raid:%s: unsupported reshape "
|
||||
@ -6672,28 +6700,25 @@ static int run(struct mddev *mddev)
|
||||
/* reshape_position must be on a new-stripe boundary, and one
|
||||
* further up in new geometry must map after here in old
|
||||
* geometry.
|
||||
* If the chunk sizes are different, then as we perform reshape
|
||||
* in units of the largest of the two, reshape_position needs
|
||||
* be a multiple of the largest chunk size times new data disks.
|
||||
*/
|
||||
here_new = mddev->reshape_position;
|
||||
if (sector_div(here_new, mddev->new_chunk_sectors *
|
||||
(mddev->raid_disks - max_degraded))) {
|
||||
chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors);
|
||||
new_data_disks = mddev->raid_disks - max_degraded;
|
||||
if (sector_div(here_new, chunk_sectors * new_data_disks)) {
|
||||
printk(KERN_ERR "md/raid:%s: reshape_position not "
|
||||
"on a stripe boundary\n", mdname(mddev));
|
||||
return -EINVAL;
|
||||
}
|
||||
reshape_offset = here_new * mddev->new_chunk_sectors;
|
||||
reshape_offset = here_new * chunk_sectors;
|
||||
/* here_new is the stripe we will write to */
|
||||
here_old = mddev->reshape_position;
|
||||
sector_div(here_old, mddev->chunk_sectors *
|
||||
(old_disks-max_degraded));
|
||||
sector_div(here_old, chunk_sectors * (old_disks-max_degraded));
|
||||
/* here_old is the first stripe that we might need to read
|
||||
* from */
|
||||
if (mddev->delta_disks == 0) {
|
||||
if ((here_new * mddev->new_chunk_sectors !=
|
||||
here_old * mddev->chunk_sectors)) {
|
||||
printk(KERN_ERR "md/raid:%s: reshape position is"
|
||||
" confused - aborting\n", mdname(mddev));
|
||||
return -EINVAL;
|
||||
}
|
||||
/* We cannot be sure it is safe to start an in-place
|
||||
* reshape. It is only safe if user-space is monitoring
|
||||
* and taking constant backups.
|
||||
@ -6712,10 +6737,10 @@ static int run(struct mddev *mddev)
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (mddev->reshape_backwards
|
||||
? (here_new * mddev->new_chunk_sectors + min_offset_diff <=
|
||||
here_old * mddev->chunk_sectors)
|
||||
: (here_new * mddev->new_chunk_sectors >=
|
||||
here_old * mddev->chunk_sectors + (-min_offset_diff))) {
|
||||
? (here_new * chunk_sectors + min_offset_diff <=
|
||||
here_old * chunk_sectors)
|
||||
: (here_new * chunk_sectors >=
|
||||
here_old * chunk_sectors + (-min_offset_diff))) {
|
||||
/* Reading from the same stripe as writing to - bad */
|
||||
printk(KERN_ERR "md/raid:%s: reshape_position too early for "
|
||||
"auto-recovery - aborting.\n",
|
||||
@ -6967,7 +6992,7 @@ static void status(struct seq_file *seq, struct mddev *mddev)
|
||||
int i;
|
||||
|
||||
seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
|
||||
mddev->chunk_sectors / 2, mddev->layout);
|
||||
conf->chunk_sectors / 2, mddev->layout);
|
||||
seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
|
||||
for (i = 0; i < conf->raid_disks; i++)
|
||||
seq_printf (seq, "%s",
|
||||
@ -7173,7 +7198,9 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
|
||||
* worth it.
|
||||
*/
|
||||
sector_t newsize;
|
||||
sectors &= ~((sector_t)mddev->chunk_sectors - 1);
|
||||
struct r5conf *conf = mddev->private;
|
||||
|
||||
sectors &= ~((sector_t)conf->chunk_sectors - 1);
|
||||
newsize = raid5_size(mddev, sectors, mddev->raid_disks);
|
||||
if (mddev->external_size &&
|
||||
mddev->array_sectors > newsize)
|
||||
@ -7412,6 +7439,7 @@ static void end_reshape(struct r5conf *conf)
|
||||
rdev->data_offset = rdev->new_data_offset;
|
||||
smp_wmb();
|
||||
conf->reshape_progress = MaxSector;
|
||||
conf->mddev->reshape_position = MaxSector;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
wake_up(&conf->wait_for_overlap);
|
||||
|
||||
|
@ -265,7 +265,7 @@ struct stripe_head_state {
|
||||
int dec_preread_active;
|
||||
unsigned long ops_request;
|
||||
|
||||
struct bio *return_bi;
|
||||
struct bio_list return_bi;
|
||||
struct md_rdev *blocked_rdev;
|
||||
int handle_bad_blocks;
|
||||
};
|
||||
@ -476,6 +476,9 @@ struct r5conf {
|
||||
int skip_copy; /* Don't copy data from bio to stripe cache */
|
||||
struct list_head *last_hold; /* detect hold_list promotions */
|
||||
|
||||
/* bios to have bi_end_io called after metadata is synced */
|
||||
struct bio_list return_bi;
|
||||
|
||||
atomic_t reshape_stripes; /* stripes with pending writes for reshape */
|
||||
/* unfortunately we need two cache names as we temporarily have
|
||||
* two caches.
|
||||
|
@ -40,9 +40,20 @@
|
||||
(unsigned long)bytes, ptrs); \
|
||||
kernel_neon_end(); \
|
||||
} \
|
||||
static void raid6_neon ## _n ## _xor_syndrome(int disks, \
|
||||
int start, int stop, \
|
||||
size_t bytes, void **ptrs) \
|
||||
{ \
|
||||
void raid6_neon ## _n ## _xor_syndrome_real(int, \
|
||||
int, int, unsigned long, void**); \
|
||||
kernel_neon_begin(); \
|
||||
raid6_neon ## _n ## _xor_syndrome_real(disks, \
|
||||
start, stop, (unsigned long)bytes, ptrs); \
|
||||
kernel_neon_end(); \
|
||||
} \
|
||||
struct raid6_calls const raid6_neonx ## _n = { \
|
||||
raid6_neon ## _n ## _gen_syndrome, \
|
||||
NULL, /* XOR not yet implemented */ \
|
||||
raid6_neon ## _n ## _xor_syndrome, \
|
||||
raid6_have_neon, \
|
||||
"neonx" #_n, \
|
||||
0 \
|
||||
|
@ -3,6 +3,7 @@
|
||||
* neon.uc - RAID-6 syndrome calculation using ARM NEON instructions
|
||||
*
|
||||
* Copyright (C) 2012 Rob Herring
|
||||
* Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* Based on altivec.uc:
|
||||
* Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
|
||||
@ -78,3 +79,48 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
|
||||
vst1q_u8(&q[d+NSIZE*$$], wq$$);
|
||||
}
|
||||
}
|
||||
|
||||
void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop,
|
||||
unsigned long bytes, void **ptrs)
|
||||
{
|
||||
uint8_t **dptr = (uint8_t **)ptrs;
|
||||
uint8_t *p, *q;
|
||||
int d, z, z0;
|
||||
|
||||
register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
|
||||
const unative_t x1d = NBYTES(0x1d);
|
||||
|
||||
z0 = stop; /* P/Q right side optimization */
|
||||
p = dptr[disks-2]; /* XOR parity */
|
||||
q = dptr[disks-1]; /* RS syndrome */
|
||||
|
||||
for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
|
||||
wq$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]);
|
||||
wp$$ = veorq_u8(vld1q_u8(&p[d+$$*NSIZE]), wq$$);
|
||||
|
||||
/* P/Q data pages */
|
||||
for ( z = z0-1 ; z >= start ; z-- ) {
|
||||
wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]);
|
||||
wp$$ = veorq_u8(wp$$, wd$$);
|
||||
w2$$ = MASK(wq$$);
|
||||
w1$$ = SHLBYTE(wq$$);
|
||||
|
||||
w2$$ = vandq_u8(w2$$, x1d);
|
||||
w1$$ = veorq_u8(w1$$, w2$$);
|
||||
wq$$ = veorq_u8(w1$$, wd$$);
|
||||
}
|
||||
/* P/Q left side optimization */
|
||||
for ( z = start-1 ; z >= 0 ; z-- ) {
|
||||
w2$$ = MASK(wq$$);
|
||||
w1$$ = SHLBYTE(wq$$);
|
||||
|
||||
w2$$ = vandq_u8(w2$$, x1d);
|
||||
wq$$ = veorq_u8(w1$$, w2$$);
|
||||
}
|
||||
w1$$ = vld1q_u8(&q[d+NSIZE*$$]);
|
||||
wq$$ = veorq_u8(wq$$, w1$$);
|
||||
|
||||
vst1q_u8(&p[d+NSIZE*$$], wp$$);
|
||||
vst1q_u8(&q[d+NSIZE*$$], wq$$);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user