mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-24 04:34:08 +08:00
md updates for 3.5
Main features: - RAID10 arrays can be reshapes - adding and removing devices and changing chunks (not 'far' array though) - allow RAID5 arrays to be reshaped with a backup file (not tested yet, but the priciple works fine for RAID10). - arrays can be reshaped while a bitmap is present - you no longer need to remove it first - SSSE3 support for RAID6 syndrome calculations and of course a number of minor fixes etc. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.18 (GNU/Linux) iQIVAwUAT7xXijnsnt1WYoG5AQLvFg/+OGeptY2cRu3HpsNsibvIyfiOYSlDpLo+ 2tYzBz2wFiFROfj41aV/PdeqE3xn/RelDmIgt9Apaimeg453O6IdjI9X00fPrgxV ATWkwWy5ykozbLIsyJYQ/kLPo0NX2KR/TtEim2lwlEjs4bLsF8TGvRa6ylcko0zI j6cbqVzkCDHXzLk/M6l0UoUaSG1PcjO6M10KBM7bS2sLoxhkn69gT7YTIlFySXW4 epNYSTKyeuSmEUI7L09s5HLf/zPZSp4MipoRIqQYcwk5gvmMNNuLbouDECvZ5BdV TXxrVVSlh7tFSeoGwYXQXcv/nFg3n53Mc+Nimzo7hhmI5ytRR9Y0c6SwvRBCN7t6 HzapQu+vBqDIPzedH+6r/gk39Auzm60JjGDYHiSdjZCAWefcYUmYm/Iso9JJ/0hg PVkSfnkgaFUx0GhXS+C9YgPHYlb5DnTCCMrbtQCL65D61D2det3oZtrQPfKIKMlw SRz2Ls+4o4UhAY7JLYNhONa0mtxhk5VTZ3LH58I9+ZurVyvqrjvCV+neSiCUsRog jT038/gT5nJ8HPsg5feQ9cS0TbEo92eg3gILy1D5cPTaMZhrV8gq0Ke7xgmBo0+Q bWh4vxU9SM/96c/umCxcmHymKAFhsMVFbJTg4r9K5atFGNyMegJYedFFEEbQMQI3 u+KRDXHN700= =q8bc -----END PGP SIGNATURE----- Merge tag 'md-3.5' of git://neil.brown.name/md Pull md updates from NeilBrown: "It's been a busy cycle for md - lots of fun stuff here.. if you like this kind of thing :-) Main features: - RAID10 arrays can be reshaped - adding and removing devices and changing chunks (not 'far' array though) - allow RAID5 arrays to be reshaped with a backup file (not tested yet, but the priciple works fine for RAID10). - arrays can be reshaped while a bitmap is present - you no longer need to remove it first - SSSE3 support for RAID6 syndrome calculations and of course a number of minor fixes etc." * tag 'md-3.5' of git://neil.brown.name/md: (56 commits) md/bitmap: record the space available for the bitmap in the superblock. md/raid10: Remove extras after reshape to smaller number of devices. md/raid5: improve removal of extra devices after reshape. md: check the return of mddev_find() MD RAID1: Further conditionalize 'fullsync' DM RAID: Use md_error() in place of simply setting Faulty bit DM RAID: Record and handle missing devices DM RAID: Set recovery flags on resume md/raid5: Allow reshape while a bitmap is present. md/raid10: resize bitmap when required during reshape. md: allow array to be resized while bitmap is present. md/bitmap: make sure reshape request are reflected in superblock. md/bitmap: add bitmap_resize function to allow bitmap resizing. md/bitmap: use DIV_ROUND_UP instead of open-code md/bitmap: create a 'struct bitmap_counts' substructure of 'struct bitmap' md/bitmap: make bitmap bitops atomic. md/bitmap: make _page_attr bitops atomic. md/bitmap: merge bitmap_file_unmap and bitmap_file_put. md/bitmap: remove async freeing of bitmap file. md/bitmap: convert some spin_lock_irqsave to spin_lock_irq ...
This commit is contained in:
commit
c80ddb5263
@ -115,9 +115,10 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
|
||||
|
||||
# does binutils support specific instructions?
|
||||
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
|
||||
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
|
||||
|
||||
KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
|
||||
KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
|
||||
KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
|
||||
KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
|
||||
|
||||
LDFLAGS := -m elf_$(UTS_MACHINE)
|
||||
|
||||
|
@ -861,6 +861,9 @@ static struct xor_block_template xor_block_pIII_sse = {
|
||||
.do_5 = xor_sse_5,
|
||||
};
|
||||
|
||||
/* Also try the AVX routines */
|
||||
#include "xor_avx.h"
|
||||
|
||||
/* Also try the generic routines. */
|
||||
#include <asm-generic/xor.h>
|
||||
|
||||
@ -871,6 +874,7 @@ do { \
|
||||
xor_speed(&xor_block_8regs_p); \
|
||||
xor_speed(&xor_block_32regs); \
|
||||
xor_speed(&xor_block_32regs_p); \
|
||||
AVX_XOR_SPEED; \
|
||||
if (cpu_has_xmm) \
|
||||
xor_speed(&xor_block_pIII_sse); \
|
||||
if (cpu_has_mmx) { \
|
||||
@ -883,6 +887,6 @@ do { \
|
||||
We may also be able to load into the L1 only depending on how the cpu
|
||||
deals with a load to a line that is being prefetched. */
|
||||
#define XOR_SELECT_TEMPLATE(FASTEST) \
|
||||
(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
|
||||
AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
|
||||
|
||||
#endif /* _ASM_X86_XOR_32_H */
|
||||
|
@ -347,15 +347,21 @@ static struct xor_block_template xor_block_sse = {
|
||||
.do_5 = xor_sse_5,
|
||||
};
|
||||
|
||||
|
||||
/* Also try the AVX routines */
|
||||
#include "xor_avx.h"
|
||||
|
||||
#undef XOR_TRY_TEMPLATES
|
||||
#define XOR_TRY_TEMPLATES \
|
||||
do { \
|
||||
AVX_XOR_SPEED; \
|
||||
xor_speed(&xor_block_sse); \
|
||||
} while (0)
|
||||
|
||||
/* We force the use of the SSE xor block because it can write around L2.
|
||||
We may also be able to load into the L1 only depending on how the cpu
|
||||
deals with a load to a line that is being prefetched. */
|
||||
#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse)
|
||||
#define XOR_SELECT_TEMPLATE(FASTEST) \
|
||||
AVX_SELECT(&xor_block_sse)
|
||||
|
||||
#endif /* _ASM_X86_XOR_64_H */
|
||||
|
214
arch/x86/include/asm/xor_avx.h
Normal file
214
arch/x86/include/asm/xor_avx.h
Normal file
@ -0,0 +1,214 @@
|
||||
#ifndef _ASM_X86_XOR_AVX_H
|
||||
#define _ASM_X86_XOR_AVX_H
|
||||
|
||||
/*
|
||||
* Optimized RAID-5 checksumming functions for AVX
|
||||
*
|
||||
* Copyright (C) 2012 Intel Corporation
|
||||
* Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
|
||||
*
|
||||
* Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_AS_AVX
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <asm/i387.h>
|
||||
|
||||
#define ALIGN32 __aligned(32)
|
||||
|
||||
#define YMM_SAVED_REGS 4
|
||||
|
||||
#define YMMS_SAVE \
|
||||
do { \
|
||||
preempt_disable(); \
|
||||
cr0 = read_cr0(); \
|
||||
clts(); \
|
||||
asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
|
||||
asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
|
||||
asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
|
||||
asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
|
||||
} while (0);
|
||||
|
||||
#define YMMS_RESTORE \
|
||||
do { \
|
||||
asm volatile("sfence" : : : "memory"); \
|
||||
asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
|
||||
asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
|
||||
asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
|
||||
asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
|
||||
write_cr0(cr0); \
|
||||
preempt_enable(); \
|
||||
} while (0);
|
||||
|
||||
#define BLOCK4(i) \
|
||||
BLOCK(32 * i, 0) \
|
||||
BLOCK(32 * (i + 1), 1) \
|
||||
BLOCK(32 * (i + 2), 2) \
|
||||
BLOCK(32 * (i + 3), 3)
|
||||
|
||||
#define BLOCK16() \
|
||||
BLOCK4(0) \
|
||||
BLOCK4(4) \
|
||||
BLOCK4(8) \
|
||||
BLOCK4(12)
|
||||
|
||||
static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
|
||||
{
|
||||
unsigned long cr0, lines = bytes >> 9;
|
||||
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
|
||||
|
||||
YMMS_SAVE
|
||||
|
||||
while (lines--) {
|
||||
#undef BLOCK
|
||||
#define BLOCK(i, reg) \
|
||||
do { \
|
||||
asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
|
||||
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
|
||||
"m" (p0[i / sizeof(*p0)])); \
|
||||
asm volatile("vmovdqa %%ymm" #reg ", %0" : \
|
||||
"=m" (p0[i / sizeof(*p0)])); \
|
||||
} while (0);
|
||||
|
||||
BLOCK16()
|
||||
|
||||
p0 = (unsigned long *)((uintptr_t)p0 + 512);
|
||||
p1 = (unsigned long *)((uintptr_t)p1 + 512);
|
||||
}
|
||||
|
||||
YMMS_RESTORE
|
||||
}
|
||||
|
||||
static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
|
||||
unsigned long *p2)
|
||||
{
|
||||
unsigned long cr0, lines = bytes >> 9;
|
||||
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
|
||||
|
||||
YMMS_SAVE
|
||||
|
||||
while (lines--) {
|
||||
#undef BLOCK
|
||||
#define BLOCK(i, reg) \
|
||||
do { \
|
||||
asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
|
||||
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
|
||||
"m" (p1[i / sizeof(*p1)])); \
|
||||
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
|
||||
"m" (p0[i / sizeof(*p0)])); \
|
||||
asm volatile("vmovdqa %%ymm" #reg ", %0" : \
|
||||
"=m" (p0[i / sizeof(*p0)])); \
|
||||
} while (0);
|
||||
|
||||
BLOCK16()
|
||||
|
||||
p0 = (unsigned long *)((uintptr_t)p0 + 512);
|
||||
p1 = (unsigned long *)((uintptr_t)p1 + 512);
|
||||
p2 = (unsigned long *)((uintptr_t)p2 + 512);
|
||||
}
|
||||
|
||||
YMMS_RESTORE
|
||||
}
|
||||
|
||||
static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
|
||||
unsigned long *p2, unsigned long *p3)
|
||||
{
|
||||
unsigned long cr0, lines = bytes >> 9;
|
||||
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
|
||||
|
||||
YMMS_SAVE
|
||||
|
||||
while (lines--) {
|
||||
#undef BLOCK
|
||||
#define BLOCK(i, reg) \
|
||||
do { \
|
||||
asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
|
||||
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
|
||||
"m" (p2[i / sizeof(*p2)])); \
|
||||
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
|
||||
"m" (p1[i / sizeof(*p1)])); \
|
||||
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
|
||||
"m" (p0[i / sizeof(*p0)])); \
|
||||
asm volatile("vmovdqa %%ymm" #reg ", %0" : \
|
||||
"=m" (p0[i / sizeof(*p0)])); \
|
||||
} while (0);
|
||||
|
||||
BLOCK16();
|
||||
|
||||
p0 = (unsigned long *)((uintptr_t)p0 + 512);
|
||||
p1 = (unsigned long *)((uintptr_t)p1 + 512);
|
||||
p2 = (unsigned long *)((uintptr_t)p2 + 512);
|
||||
p3 = (unsigned long *)((uintptr_t)p3 + 512);
|
||||
}
|
||||
|
||||
YMMS_RESTORE
|
||||
}
|
||||
|
||||
static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
|
||||
unsigned long *p2, unsigned long *p3, unsigned long *p4)
|
||||
{
|
||||
unsigned long cr0, lines = bytes >> 9;
|
||||
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
|
||||
|
||||
YMMS_SAVE
|
||||
|
||||
while (lines--) {
|
||||
#undef BLOCK
|
||||
#define BLOCK(i, reg) \
|
||||
do { \
|
||||
asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
|
||||
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
|
||||
"m" (p3[i / sizeof(*p3)])); \
|
||||
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
|
||||
"m" (p2[i / sizeof(*p2)])); \
|
||||
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
|
||||
"m" (p1[i / sizeof(*p1)])); \
|
||||
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
|
||||
"m" (p0[i / sizeof(*p0)])); \
|
||||
asm volatile("vmovdqa %%ymm" #reg ", %0" : \
|
||||
"=m" (p0[i / sizeof(*p0)])); \
|
||||
} while (0);
|
||||
|
||||
BLOCK16()
|
||||
|
||||
p0 = (unsigned long *)((uintptr_t)p0 + 512);
|
||||
p1 = (unsigned long *)((uintptr_t)p1 + 512);
|
||||
p2 = (unsigned long *)((uintptr_t)p2 + 512);
|
||||
p3 = (unsigned long *)((uintptr_t)p3 + 512);
|
||||
p4 = (unsigned long *)((uintptr_t)p4 + 512);
|
||||
}
|
||||
|
||||
YMMS_RESTORE
|
||||
}
|
||||
|
||||
static struct xor_block_template xor_block_avx = {
|
||||
.name = "avx",
|
||||
.do_2 = xor_avx_2,
|
||||
.do_3 = xor_avx_3,
|
||||
.do_4 = xor_avx_4,
|
||||
.do_5 = xor_avx_5,
|
||||
};
|
||||
|
||||
#define AVX_XOR_SPEED \
|
||||
do { \
|
||||
if (cpu_has_avx) \
|
||||
xor_speed(&xor_block_avx); \
|
||||
} while (0)
|
||||
|
||||
#define AVX_SELECT(FASTEST) \
|
||||
(cpu_has_avx ? &xor_block_avx : FASTEST)
|
||||
|
||||
#else
|
||||
|
||||
#define AVX_XOR_SPEED {}
|
||||
|
||||
#define AVX_SELECT(FASTEST) (FASTEST)
|
||||
|
||||
#endif
|
||||
#endif
|
13
crypto/xor.c
13
crypto/xor.c
@ -21,6 +21,7 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/raid/xor.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <asm/xor.h>
|
||||
|
||||
/* The xor routines to use. */
|
||||
@ -63,12 +64,14 @@ static void
|
||||
do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
|
||||
{
|
||||
int speed;
|
||||
unsigned long now;
|
||||
unsigned long now, j;
|
||||
int i, count, max;
|
||||
|
||||
tmpl->next = template_list;
|
||||
template_list = tmpl;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/*
|
||||
* Count the number of XORs done during a whole jiffy, and use
|
||||
* this to calculate the speed of checksumming. We use a 2-page
|
||||
@ -76,9 +79,11 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
|
||||
*/
|
||||
max = 0;
|
||||
for (i = 0; i < 5; i++) {
|
||||
now = jiffies;
|
||||
j = jiffies;
|
||||
count = 0;
|
||||
while (jiffies == now) {
|
||||
while ((now = jiffies) == j)
|
||||
cpu_relax();
|
||||
while (time_before(jiffies, now + 1)) {
|
||||
mb(); /* prevent loop optimzation */
|
||||
tmpl->do_2(BENCH_SIZE, b1, b2);
|
||||
mb();
|
||||
@ -89,6 +94,8 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
|
||||
max = count;
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
|
||||
speed = max * (HZ * BENCH_SIZE / 1024);
|
||||
tmpl->speed = speed;
|
||||
|
||||
|
1108
drivers/md/bitmap.c
1108
drivers/md/bitmap.c
File diff suppressed because it is too large
Load Diff
@ -111,9 +111,9 @@ typedef __u16 bitmap_counter_t;
|
||||
|
||||
/* use these for bitmap->flags and bitmap->sb->state bit-fields */
|
||||
enum bitmap_state {
|
||||
BITMAP_STALE = 0x002, /* the bitmap file is out of date or had -EIO */
|
||||
BITMAP_WRITE_ERROR = 0x004, /* A write error has occurred */
|
||||
BITMAP_HOSTENDIAN = 0x8000,
|
||||
BITMAP_STALE = 1, /* the bitmap file is out of date or had -EIO */
|
||||
BITMAP_WRITE_ERROR = 2, /* A write error has occurred */
|
||||
BITMAP_HOSTENDIAN =15,
|
||||
};
|
||||
|
||||
/* the superblock at the front of the bitmap file -- little endian */
|
||||
@ -128,8 +128,10 @@ typedef struct bitmap_super_s {
|
||||
__le32 chunksize; /* 52 the bitmap chunk size in bytes */
|
||||
__le32 daemon_sleep; /* 56 seconds between disk flushes */
|
||||
__le32 write_behind; /* 60 number of outstanding write-behind writes */
|
||||
__le32 sectors_reserved; /* 64 number of 512-byte sectors that are
|
||||
* reserved for the bitmap. */
|
||||
|
||||
__u8 pad[256 - 64]; /* set to zero */
|
||||
__u8 pad[256 - 68]; /* set to zero */
|
||||
} bitmap_super_t;
|
||||
|
||||
/* notes:
|
||||
@ -159,36 +161,49 @@ struct bitmap_page {
|
||||
* pointer and use it as two counters itself
|
||||
*/
|
||||
unsigned int hijacked:1;
|
||||
/*
|
||||
* If any counter in this page is '1' or '2' - and so could be
|
||||
* cleared then that page is marked as 'pending'
|
||||
*/
|
||||
unsigned int pending:1;
|
||||
/*
|
||||
* count of dirty bits on the page
|
||||
*/
|
||||
unsigned int count:31;
|
||||
unsigned int count:30;
|
||||
};
|
||||
|
||||
/* the main bitmap structure - one per mddev */
|
||||
struct bitmap {
|
||||
struct bitmap_page *bp;
|
||||
unsigned long pages; /* total number of pages in the bitmap */
|
||||
unsigned long missing_pages; /* number of pages not yet allocated */
|
||||
|
||||
struct bitmap_counts {
|
||||
spinlock_t lock;
|
||||
struct bitmap_page *bp;
|
||||
unsigned long pages; /* total number of pages
|
||||
* in the bitmap */
|
||||
unsigned long missing_pages; /* number of pages
|
||||
* not yet allocated */
|
||||
unsigned long chunkshift; /* chunksize = 2^chunkshift
|
||||
* (for bitops) */
|
||||
unsigned long chunks; /* Total number of data
|
||||
* chunks for the array */
|
||||
} counts;
|
||||
|
||||
struct mddev *mddev; /* the md device that the bitmap is for */
|
||||
|
||||
/* bitmap chunksize -- how much data does each bit represent? */
|
||||
unsigned long chunkshift; /* chunksize = 2^(chunkshift+9) (for bitops) */
|
||||
unsigned long chunks; /* total number of data chunks for the array */
|
||||
|
||||
__u64 events_cleared;
|
||||
int need_sync;
|
||||
|
||||
/* bitmap spinlock */
|
||||
spinlock_t lock;
|
||||
|
||||
struct file *file; /* backing disk file */
|
||||
struct page *sb_page; /* cached copy of the bitmap file superblock */
|
||||
struct page **filemap; /* list of cache pages for the file */
|
||||
unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
|
||||
unsigned long file_pages; /* number of pages in the file */
|
||||
int last_page_size; /* bytes in the last page */
|
||||
struct bitmap_storage {
|
||||
struct file *file; /* backing disk file */
|
||||
struct page *sb_page; /* cached copy of the bitmap
|
||||
* file superblock */
|
||||
struct page **filemap; /* list of cache pages for
|
||||
* the file */
|
||||
unsigned long *filemap_attr; /* attributes associated
|
||||
* w/ filemap pages */
|
||||
unsigned long file_pages; /* number of pages in the file*/
|
||||
unsigned long bytes; /* total bytes in the bitmap */
|
||||
} storage;
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
@ -242,6 +257,9 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
|
||||
|
||||
void bitmap_unplug(struct bitmap *bitmap);
|
||||
void bitmap_daemon_work(struct mddev *mddev);
|
||||
|
||||
int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
|
||||
int chunksize, int init);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -155,10 +155,7 @@ static void context_free(struct raid_set *rs)
|
||||
for (i = 0; i < rs->md.raid_disks; i++) {
|
||||
if (rs->dev[i].meta_dev)
|
||||
dm_put_device(rs->ti, rs->dev[i].meta_dev);
|
||||
if (rs->dev[i].rdev.sb_page)
|
||||
put_page(rs->dev[i].rdev.sb_page);
|
||||
rs->dev[i].rdev.sb_page = NULL;
|
||||
rs->dev[i].rdev.sb_loaded = 0;
|
||||
md_rdev_clear(&rs->dev[i].rdev);
|
||||
if (rs->dev[i].data_dev)
|
||||
dm_put_device(rs->ti, rs->dev[i].data_dev);
|
||||
}
|
||||
@ -606,7 +603,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
|
||||
if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
|
||||
DMERR("Failed to read superblock of device at position %d",
|
||||
rdev->raid_disk);
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
md_error(rdev->mddev, rdev);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -617,16 +614,18 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
|
||||
|
||||
static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct md_rdev *r;
|
||||
int i;
|
||||
uint64_t failed_devices;
|
||||
struct dm_raid_superblock *sb;
|
||||
struct raid_set *rs = container_of(mddev, struct raid_set, md);
|
||||
|
||||
sb = page_address(rdev->sb_page);
|
||||
failed_devices = le64_to_cpu(sb->failed_devices);
|
||||
|
||||
rdev_for_each(r, mddev)
|
||||
if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags))
|
||||
failed_devices |= (1ULL << r->raid_disk);
|
||||
for (i = 0; i < mddev->raid_disks; i++)
|
||||
if (!rs->dev[i].data_dev ||
|
||||
test_bit(Faulty, &(rs->dev[i].rdev.flags)))
|
||||
failed_devices |= (1ULL << i);
|
||||
|
||||
memset(sb, 0, sizeof(*sb));
|
||||
|
||||
@ -1252,12 +1251,13 @@ static void raid_resume(struct dm_target *ti)
|
||||
{
|
||||
struct raid_set *rs = ti->private;
|
||||
|
||||
set_bit(MD_CHANGE_DEVS, &rs->md.flags);
|
||||
if (!rs->bitmap_loaded) {
|
||||
bitmap_load(&rs->md);
|
||||
rs->bitmap_loaded = 1;
|
||||
} else
|
||||
md_wakeup_thread(rs->md.thread);
|
||||
}
|
||||
|
||||
clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
|
||||
mddev_resume(&rs->md);
|
||||
}
|
||||
|
||||
|
370
drivers/md/md.c
370
drivers/md/md.c
@ -402,6 +402,7 @@ void mddev_resume(struct mddev *mddev)
|
||||
wake_up(&mddev->sb_wait);
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
|
||||
}
|
||||
@ -452,7 +453,7 @@ static void submit_flushes(struct work_struct *ws)
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev);
|
||||
bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
|
||||
bi->bi_end_io = md_end_flush;
|
||||
bi->bi_private = rdev;
|
||||
bi->bi_bdev = rdev->bdev;
|
||||
@ -607,6 +608,7 @@ void mddev_init(struct mddev *mddev)
|
||||
init_waitqueue_head(&mddev->sb_wait);
|
||||
init_waitqueue_head(&mddev->recovery_wait);
|
||||
mddev->reshape_position = MaxSector;
|
||||
mddev->reshape_backwards = 0;
|
||||
mddev->resync_min = 0;
|
||||
mddev->resync_max = MaxSector;
|
||||
mddev->level = LEVEL_NONE;
|
||||
@ -802,7 +804,7 @@ static int alloc_disk_sb(struct md_rdev * rdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_disk_sb(struct md_rdev * rdev)
|
||||
void md_rdev_clear(struct md_rdev *rdev)
|
||||
{
|
||||
if (rdev->sb_page) {
|
||||
put_page(rdev->sb_page);
|
||||
@ -815,8 +817,10 @@ static void free_disk_sb(struct md_rdev * rdev)
|
||||
put_page(rdev->bb_page);
|
||||
rdev->bb_page = NULL;
|
||||
}
|
||||
kfree(rdev->badblocks.page);
|
||||
rdev->badblocks.page = NULL;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(md_rdev_clear);
|
||||
|
||||
static void super_written(struct bio *bio, int error)
|
||||
{
|
||||
@ -887,6 +891,10 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
|
||||
rdev->meta_bdev : rdev->bdev;
|
||||
if (metadata_op)
|
||||
bio->bi_sector = sector + rdev->sb_start;
|
||||
else if (rdev->mddev->reshape_position != MaxSector &&
|
||||
(rdev->mddev->reshape_backwards ==
|
||||
(sector >= rdev->mddev->reshape_position)))
|
||||
bio->bi_sector = sector + rdev->new_data_offset;
|
||||
else
|
||||
bio->bi_sector = sector + rdev->data_offset;
|
||||
bio_add_page(bio, page, size, 0);
|
||||
@ -1034,12 +1042,17 @@ static unsigned int calc_sb_csum(mdp_super_t * sb)
|
||||
struct super_type {
|
||||
char *name;
|
||||
struct module *owner;
|
||||
int (*load_super)(struct md_rdev *rdev, struct md_rdev *refdev,
|
||||
int (*load_super)(struct md_rdev *rdev,
|
||||
struct md_rdev *refdev,
|
||||
int minor_version);
|
||||
int (*validate_super)(struct mddev *mddev, struct md_rdev *rdev);
|
||||
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
|
||||
int (*validate_super)(struct mddev *mddev,
|
||||
struct md_rdev *rdev);
|
||||
void (*sync_super)(struct mddev *mddev,
|
||||
struct md_rdev *rdev);
|
||||
unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
|
||||
sector_t num_sectors);
|
||||
int (*allow_new_offset)(struct md_rdev *rdev,
|
||||
unsigned long long new_offset);
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1111,6 +1124,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
|
||||
|
||||
rdev->preferred_minor = sb->md_minor;
|
||||
rdev->data_offset = 0;
|
||||
rdev->new_data_offset = 0;
|
||||
rdev->sb_size = MD_SB_BYTES;
|
||||
rdev->badblocks.shift = -1;
|
||||
|
||||
@ -1184,7 +1198,11 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
mddev->dev_sectors = ((sector_t)sb->size) * 2;
|
||||
mddev->events = ev1;
|
||||
mddev->bitmap_info.offset = 0;
|
||||
mddev->bitmap_info.space = 0;
|
||||
/* bitmap can use 60 K after the 4K superblocks */
|
||||
mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
|
||||
mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
|
||||
mddev->reshape_backwards = 0;
|
||||
|
||||
if (mddev->minor_version >= 91) {
|
||||
mddev->reshape_position = sb->reshape_position;
|
||||
@ -1192,6 +1210,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
mddev->new_level = sb->new_level;
|
||||
mddev->new_layout = sb->new_layout;
|
||||
mddev->new_chunk_sectors = sb->new_chunk >> 9;
|
||||
if (mddev->delta_disks < 0)
|
||||
mddev->reshape_backwards = 1;
|
||||
} else {
|
||||
mddev->reshape_position = MaxSector;
|
||||
mddev->delta_disks = 0;
|
||||
@ -1218,9 +1238,12 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
mddev->max_disks = MD_SB_DISKS;
|
||||
|
||||
if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
|
||||
mddev->bitmap_info.file == NULL)
|
||||
mddev->bitmap_info.file == NULL) {
|
||||
mddev->bitmap_info.offset =
|
||||
mddev->bitmap_info.default_offset;
|
||||
mddev->bitmap_info.space =
|
||||
mddev->bitmap_info.space;
|
||||
}
|
||||
|
||||
} else if (mddev->pers == NULL) {
|
||||
/* Insist on good event counter while assembling, except
|
||||
@ -1434,6 +1457,12 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
|
||||
return num_sectors;
|
||||
}
|
||||
|
||||
static int
|
||||
super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
|
||||
{
|
||||
/* non-zero offset changes not possible with v0.90 */
|
||||
return new_offset == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* version 1 superblock
|
||||
@ -1469,6 +1498,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
||||
struct mdp_superblock_1 *sb;
|
||||
int ret;
|
||||
sector_t sb_start;
|
||||
sector_t sectors;
|
||||
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
|
||||
int bmask;
|
||||
|
||||
@ -1523,9 +1553,18 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
||||
bdevname(rdev->bdev,b));
|
||||
return -EINVAL;
|
||||
}
|
||||
if (sb->pad0 ||
|
||||
sb->pad3[0] ||
|
||||
memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
|
||||
/* Some padding is non-zero, might be a new feature */
|
||||
return -EINVAL;
|
||||
|
||||
rdev->preferred_minor = 0xffff;
|
||||
rdev->data_offset = le64_to_cpu(sb->data_offset);
|
||||
rdev->new_data_offset = rdev->data_offset;
|
||||
if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
|
||||
(le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
|
||||
rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
|
||||
atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
|
||||
|
||||
rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
|
||||
@ -1536,6 +1575,9 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
||||
if (minor_version
|
||||
&& rdev->data_offset < sb_start + (rdev->sb_size/512))
|
||||
return -EINVAL;
|
||||
if (minor_version
|
||||
&& rdev->new_data_offset < sb_start + (rdev->sb_size/512))
|
||||
return -EINVAL;
|
||||
|
||||
if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
|
||||
rdev->desc_nr = -1;
|
||||
@ -1607,16 +1649,14 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
||||
else
|
||||
ret = 0;
|
||||
}
|
||||
if (minor_version)
|
||||
rdev->sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
|
||||
le64_to_cpu(sb->data_offset);
|
||||
else
|
||||
rdev->sectors = rdev->sb_start;
|
||||
if (rdev->sectors < le64_to_cpu(sb->data_size))
|
||||
if (minor_version) {
|
||||
sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
|
||||
sectors -= rdev->data_offset;
|
||||
} else
|
||||
sectors = rdev->sb_start;
|
||||
if (sectors < le64_to_cpu(sb->data_size))
|
||||
return -EINVAL;
|
||||
rdev->sectors = le64_to_cpu(sb->data_size);
|
||||
if (le64_to_cpu(sb->size) > rdev->sectors)
|
||||
return -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1644,17 +1684,37 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
mddev->dev_sectors = le64_to_cpu(sb->size);
|
||||
mddev->events = ev1;
|
||||
mddev->bitmap_info.offset = 0;
|
||||
mddev->bitmap_info.space = 0;
|
||||
/* Default location for bitmap is 1K after superblock
|
||||
* using 3K - total of 4K
|
||||
*/
|
||||
mddev->bitmap_info.default_offset = 1024 >> 9;
|
||||
|
||||
mddev->bitmap_info.default_space = (4096-1024) >> 9;
|
||||
mddev->reshape_backwards = 0;
|
||||
|
||||
mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
|
||||
memcpy(mddev->uuid, sb->set_uuid, 16);
|
||||
|
||||
mddev->max_disks = (4096-256)/2;
|
||||
|
||||
if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
|
||||
mddev->bitmap_info.file == NULL )
|
||||
mddev->bitmap_info.file == NULL) {
|
||||
mddev->bitmap_info.offset =
|
||||
(__s32)le32_to_cpu(sb->bitmap_offset);
|
||||
/* Metadata doesn't record how much space is available.
|
||||
* For 1.0, we assume we can use up to the superblock
|
||||
* if before, else to 4K beyond superblock.
|
||||
* For others, assume no change is possible.
|
||||
*/
|
||||
if (mddev->minor_version > 0)
|
||||
mddev->bitmap_info.space = 0;
|
||||
else if (mddev->bitmap_info.offset > 0)
|
||||
mddev->bitmap_info.space =
|
||||
8 - mddev->bitmap_info.offset;
|
||||
else
|
||||
mddev->bitmap_info.space =
|
||||
-mddev->bitmap_info.offset;
|
||||
}
|
||||
|
||||
if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
|
||||
mddev->reshape_position = le64_to_cpu(sb->reshape_position);
|
||||
@ -1662,6 +1722,11 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
mddev->new_level = le32_to_cpu(sb->new_level);
|
||||
mddev->new_layout = le32_to_cpu(sb->new_layout);
|
||||
mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
|
||||
if (mddev->delta_disks < 0 ||
|
||||
(mddev->delta_disks == 0 &&
|
||||
(le32_to_cpu(sb->feature_map)
|
||||
& MD_FEATURE_RESHAPE_BACKWARDS)))
|
||||
mddev->reshape_backwards = 1;
|
||||
} else {
|
||||
mddev->reshape_position = MaxSector;
|
||||
mddev->delta_disks = 0;
|
||||
@ -1735,7 +1800,6 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
|
||||
sb->feature_map = 0;
|
||||
sb->pad0 = 0;
|
||||
sb->recovery_offset = cpu_to_le64(0);
|
||||
memset(sb->pad1, 0, sizeof(sb->pad1));
|
||||
memset(sb->pad3, 0, sizeof(sb->pad3));
|
||||
|
||||
sb->utime = cpu_to_le64((__u64)mddev->utime);
|
||||
@ -1757,6 +1821,8 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
|
||||
sb->devflags |= WriteMostly1;
|
||||
else
|
||||
sb->devflags &= ~WriteMostly1;
|
||||
sb->data_offset = cpu_to_le64(rdev->data_offset);
|
||||
sb->data_size = cpu_to_le64(rdev->sectors);
|
||||
|
||||
if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
|
||||
sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
|
||||
@ -1781,6 +1847,16 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
|
||||
sb->delta_disks = cpu_to_le32(mddev->delta_disks);
|
||||
sb->new_level = cpu_to_le32(mddev->new_level);
|
||||
sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
|
||||
if (mddev->delta_disks == 0 &&
|
||||
mddev->reshape_backwards)
|
||||
sb->feature_map
|
||||
|= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
|
||||
if (rdev->new_data_offset != rdev->data_offset) {
|
||||
sb->feature_map
|
||||
|= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
|
||||
sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
|
||||
- rdev->data_offset));
|
||||
}
|
||||
}
|
||||
|
||||
if (rdev->badblocks.count == 0)
|
||||
@ -1857,6 +1933,8 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
|
||||
sector_t max_sectors;
|
||||
if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
|
||||
return 0; /* component must fit device */
|
||||
if (rdev->data_offset != rdev->new_data_offset)
|
||||
return 0; /* too confusing */
|
||||
if (rdev->sb_start < rdev->data_offset) {
|
||||
/* minor versions 1 and 2; superblock before data */
|
||||
max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
|
||||
@ -1884,6 +1962,40 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
|
||||
rdev->sb_page);
|
||||
md_super_wait(rdev->mddev);
|
||||
return num_sectors;
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
super_1_allow_new_offset(struct md_rdev *rdev,
|
||||
unsigned long long new_offset)
|
||||
{
|
||||
/* All necessary checks on new >= old have been done */
|
||||
struct bitmap *bitmap;
|
||||
if (new_offset >= rdev->data_offset)
|
||||
return 1;
|
||||
|
||||
/* with 1.0 metadata, there is no metadata to tread on
|
||||
* so we can always move back */
|
||||
if (rdev->mddev->minor_version == 0)
|
||||
return 1;
|
||||
|
||||
/* otherwise we must be sure not to step on
|
||||
* any metadata, so stay:
|
||||
* 36K beyond start of superblock
|
||||
* beyond end of badblocks
|
||||
* beyond write-intent bitmap
|
||||
*/
|
||||
if (rdev->sb_start + (32+4)*2 > new_offset)
|
||||
return 0;
|
||||
bitmap = rdev->mddev->bitmap;
|
||||
if (bitmap && !rdev->mddev->bitmap_info.file &&
|
||||
rdev->sb_start + rdev->mddev->bitmap_info.offset +
|
||||
bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
|
||||
return 0;
|
||||
if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct super_type super_types[] = {
|
||||
@ -1894,6 +2006,7 @@ static struct super_type super_types[] = {
|
||||
.validate_super = super_90_validate,
|
||||
.sync_super = super_90_sync,
|
||||
.rdev_size_change = super_90_rdev_size_change,
|
||||
.allow_new_offset = super_90_allow_new_offset,
|
||||
},
|
||||
[1] = {
|
||||
.name = "md-1",
|
||||
@ -1902,6 +2015,7 @@ static struct super_type super_types[] = {
|
||||
.validate_super = super_1_validate,
|
||||
.sync_super = super_1_sync,
|
||||
.rdev_size_change = super_1_rdev_size_change,
|
||||
.allow_new_offset = super_1_allow_new_offset,
|
||||
},
|
||||
};
|
||||
|
||||
@ -2105,9 +2219,7 @@ static void unbind_rdev_from_array(struct md_rdev * rdev)
|
||||
sysfs_remove_link(&rdev->kobj, "block");
|
||||
sysfs_put(rdev->sysfs_state);
|
||||
rdev->sysfs_state = NULL;
|
||||
kfree(rdev->badblocks.page);
|
||||
rdev->badblocks.count = 0;
|
||||
rdev->badblocks.page = NULL;
|
||||
/* We need to delay this, otherwise we can deadlock when
|
||||
* writing to 'remove' to "dev/state". We also need
|
||||
* to delay it due to rcu usage.
|
||||
@ -2158,7 +2270,7 @@ static void export_rdev(struct md_rdev * rdev)
|
||||
bdevname(rdev->bdev,b));
|
||||
if (rdev->mddev)
|
||||
MD_BUG();
|
||||
free_disk_sb(rdev);
|
||||
md_rdev_clear(rdev);
|
||||
#ifndef MODULE
|
||||
if (test_bit(AutoDetected, &rdev->flags))
|
||||
md_autodetect_dev(rdev->bdev->bd_dev);
|
||||
@ -2809,9 +2921,8 @@ offset_show(struct md_rdev *rdev, char *page)
|
||||
static ssize_t
|
||||
offset_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
{
|
||||
char *e;
|
||||
unsigned long long offset = simple_strtoull(buf, &e, 10);
|
||||
if (e==buf || (*e && *e != '\n'))
|
||||
unsigned long long offset;
|
||||
if (strict_strtoull(buf, 10, &offset) < 0)
|
||||
return -EINVAL;
|
||||
if (rdev->mddev->pers && rdev->raid_disk >= 0)
|
||||
return -EBUSY;
|
||||
@ -2826,6 +2937,63 @@ offset_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
static struct rdev_sysfs_entry rdev_offset =
|
||||
__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
|
||||
|
||||
static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
|
||||
{
|
||||
return sprintf(page, "%llu\n",
|
||||
(unsigned long long)rdev->new_data_offset);
|
||||
}
|
||||
|
||||
static ssize_t new_offset_store(struct md_rdev *rdev,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
unsigned long long new_offset;
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
|
||||
if (strict_strtoull(buf, 10, &new_offset) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (mddev->sync_thread)
|
||||
return -EBUSY;
|
||||
if (new_offset == rdev->data_offset)
|
||||
/* reset is always permitted */
|
||||
;
|
||||
else if (new_offset > rdev->data_offset) {
|
||||
/* must not push array size beyond rdev_sectors */
|
||||
if (new_offset - rdev->data_offset
|
||||
+ mddev->dev_sectors > rdev->sectors)
|
||||
return -E2BIG;
|
||||
}
|
||||
/* Metadata worries about other space details. */
|
||||
|
||||
/* decreasing the offset is inconsistent with a backwards
|
||||
* reshape.
|
||||
*/
|
||||
if (new_offset < rdev->data_offset &&
|
||||
mddev->reshape_backwards)
|
||||
return -EINVAL;
|
||||
/* Increasing offset is inconsistent with forwards
|
||||
* reshape. reshape_direction should be set to
|
||||
* 'backwards' first.
|
||||
*/
|
||||
if (new_offset > rdev->data_offset &&
|
||||
!mddev->reshape_backwards)
|
||||
return -EINVAL;
|
||||
|
||||
if (mddev->pers && mddev->persistent &&
|
||||
!super_types[mddev->major_version]
|
||||
.allow_new_offset(rdev, new_offset))
|
||||
return -E2BIG;
|
||||
rdev->new_data_offset = new_offset;
|
||||
if (new_offset > rdev->data_offset)
|
||||
mddev->reshape_backwards = 1;
|
||||
else if (new_offset < rdev->data_offset)
|
||||
mddev->reshape_backwards = 0;
|
||||
|
||||
return len;
|
||||
}
|
||||
static struct rdev_sysfs_entry rdev_new_offset =
|
||||
__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
|
||||
|
||||
static ssize_t
|
||||
rdev_size_show(struct md_rdev *rdev, char *page)
|
||||
{
|
||||
@ -2870,6 +3038,8 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
|
||||
if (strict_blocks_to_sectors(buf, §ors) < 0)
|
||||
return -EINVAL;
|
||||
if (rdev->data_offset != rdev->new_data_offset)
|
||||
return -EINVAL; /* too confusing */
|
||||
if (my_mddev->pers && rdev->raid_disk >= 0) {
|
||||
if (my_mddev->persistent) {
|
||||
sectors = super_types[my_mddev->major_version].
|
||||
@ -3006,6 +3176,7 @@ static struct attribute *rdev_default_attrs[] = {
|
||||
&rdev_errors.attr,
|
||||
&rdev_slot.attr,
|
||||
&rdev_offset.attr,
|
||||
&rdev_new_offset.attr,
|
||||
&rdev_size.attr,
|
||||
&rdev_recovery_start.attr,
|
||||
&rdev_bad_blocks.attr,
|
||||
@ -3080,6 +3251,7 @@ int md_rdev_init(struct md_rdev *rdev)
|
||||
rdev->raid_disk = -1;
|
||||
rdev->flags = 0;
|
||||
rdev->data_offset = 0;
|
||||
rdev->new_data_offset = 0;
|
||||
rdev->sb_events = 0;
|
||||
rdev->last_read_error.tv_sec = 0;
|
||||
rdev->last_read_error.tv_nsec = 0;
|
||||
@ -3178,8 +3350,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
|
||||
abort_free:
|
||||
if (rdev->bdev)
|
||||
unlock_rdev(rdev);
|
||||
free_disk_sb(rdev);
|
||||
kfree(rdev->badblocks.page);
|
||||
md_rdev_clear(rdev);
|
||||
kfree(rdev);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
@ -3419,6 +3590,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
mddev->new_chunk_sectors = mddev->chunk_sectors;
|
||||
mddev->raid_disks -= mddev->delta_disks;
|
||||
mddev->delta_disks = 0;
|
||||
mddev->reshape_backwards = 0;
|
||||
module_put(pers->owner);
|
||||
printk(KERN_WARNING "md: %s: %s would not accept array\n",
|
||||
mdname(mddev), clevel);
|
||||
@ -3492,6 +3664,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
mddev->layout = mddev->new_layout;
|
||||
mddev->chunk_sectors = mddev->new_chunk_sectors;
|
||||
mddev->delta_disks = 0;
|
||||
mddev->reshape_backwards = 0;
|
||||
mddev->degraded = 0;
|
||||
if (mddev->pers->sync_request == NULL) {
|
||||
/* this is now an array without redundancy, so
|
||||
@ -3501,10 +3674,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
del_timer_sync(&mddev->safemode_timer);
|
||||
}
|
||||
pers->run(mddev);
|
||||
mddev_resume(mddev);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
mddev_resume(mddev);
|
||||
sysfs_notify(&mddev->kobj, NULL, "level");
|
||||
md_new_event(mddev);
|
||||
return rv;
|
||||
@ -3582,9 +3753,20 @@ raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (mddev->pers)
|
||||
rv = update_raid_disks(mddev, n);
|
||||
else if (mddev->reshape_position != MaxSector) {
|
||||
struct md_rdev *rdev;
|
||||
int olddisks = mddev->raid_disks - mddev->delta_disks;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (olddisks < n &&
|
||||
rdev->data_offset < rdev->new_data_offset)
|
||||
return -EINVAL;
|
||||
if (olddisks > n &&
|
||||
rdev->data_offset > rdev->new_data_offset)
|
||||
return -EINVAL;
|
||||
}
|
||||
mddev->delta_disks = n - olddisks;
|
||||
mddev->raid_disks = n;
|
||||
mddev->reshape_backwards = (mddev->delta_disks < 0);
|
||||
} else
|
||||
mddev->raid_disks = n;
|
||||
return rv ? rv : len;
|
||||
@ -4266,7 +4448,8 @@ sync_completed_show(struct mddev *mddev, char *page)
|
||||
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
return sprintf(page, "none\n");
|
||||
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||
max_sectors = mddev->resync_max_sectors;
|
||||
else
|
||||
max_sectors = mddev->dev_sectors;
|
||||
@ -4428,6 +4611,7 @@ reshape_position_show(struct mddev *mddev, char *page)
|
||||
static ssize_t
|
||||
reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
char *e;
|
||||
unsigned long long new = simple_strtoull(buf, &e, 10);
|
||||
if (mddev->pers)
|
||||
@ -4436,9 +4620,12 @@ reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
return -EINVAL;
|
||||
mddev->reshape_position = new;
|
||||
mddev->delta_disks = 0;
|
||||
mddev->reshape_backwards = 0;
|
||||
mddev->new_level = mddev->level;
|
||||
mddev->new_layout = mddev->layout;
|
||||
mddev->new_chunk_sectors = mddev->chunk_sectors;
|
||||
rdev_for_each(rdev, mddev)
|
||||
rdev->new_data_offset = rdev->data_offset;
|
||||
return len;
|
||||
}
|
||||
|
||||
@ -4446,6 +4633,42 @@ static struct md_sysfs_entry md_reshape_position =
|
||||
__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
|
||||
reshape_position_store);
|
||||
|
||||
static ssize_t
|
||||
reshape_direction_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
return sprintf(page, "%s\n",
|
||||
mddev->reshape_backwards ? "backwards" : "forwards");
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
{
|
||||
int backwards = 0;
|
||||
if (cmd_match(buf, "forwards"))
|
||||
backwards = 0;
|
||||
else if (cmd_match(buf, "backwards"))
|
||||
backwards = 1;
|
||||
else
|
||||
return -EINVAL;
|
||||
if (mddev->reshape_backwards == backwards)
|
||||
return len;
|
||||
|
||||
/* check if we are allowed to change */
|
||||
if (mddev->delta_disks)
|
||||
return -EBUSY;
|
||||
|
||||
if (mddev->persistent &&
|
||||
mddev->major_version == 0)
|
||||
return -EINVAL;
|
||||
|
||||
mddev->reshape_backwards = backwards;
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry md_reshape_direction =
|
||||
__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
|
||||
reshape_direction_store);
|
||||
|
||||
static ssize_t
|
||||
array_size_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
@ -4501,6 +4724,7 @@ static struct attribute *md_default_attrs[] = {
|
||||
&md_safe_delay.attr,
|
||||
&md_array_state.attr,
|
||||
&md_reshape_position.attr,
|
||||
&md_reshape_direction.attr,
|
||||
&md_array_size.attr,
|
||||
&max_corr_read_errors.attr,
|
||||
NULL,
|
||||
@ -4914,7 +5138,8 @@ int md_run(struct mddev *mddev)
|
||||
err = -EINVAL;
|
||||
mddev->pers->stop(mddev);
|
||||
}
|
||||
if (err == 0 && mddev->pers->sync_request) {
|
||||
if (err == 0 && mddev->pers->sync_request &&
|
||||
(mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
|
||||
err = bitmap_create(mddev);
|
||||
if (err) {
|
||||
printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
|
||||
@ -5064,6 +5289,7 @@ static void md_clean(struct mddev *mddev)
|
||||
mddev->events = 0;
|
||||
mddev->can_decrease_events = 0;
|
||||
mddev->delta_disks = 0;
|
||||
mddev->reshape_backwards = 0;
|
||||
mddev->new_level = LEVEL_NONE;
|
||||
mddev->new_layout = 0;
|
||||
mddev->new_chunk_sectors = 0;
|
||||
@ -5079,6 +5305,7 @@ static void md_clean(struct mddev *mddev)
|
||||
mddev->merge_check_needed = 0;
|
||||
mddev->bitmap_info.offset = 0;
|
||||
mddev->bitmap_info.default_offset = 0;
|
||||
mddev->bitmap_info.default_space = 0;
|
||||
mddev->bitmap_info.chunksize = 0;
|
||||
mddev->bitmap_info.daemon_sleep = 0;
|
||||
mddev->bitmap_info.max_write_behind = 0;
|
||||
@ -5421,7 +5648,7 @@ static int get_bitmap_file(struct mddev * mddev, void __user * arg)
|
||||
goto out;
|
||||
|
||||
/* bitmap disabled, zero the first byte and copy out */
|
||||
if (!mddev->bitmap || !mddev->bitmap->file) {
|
||||
if (!mddev->bitmap || !mddev->bitmap->storage.file) {
|
||||
file->pathname[0] = '\0';
|
||||
goto copy_out;
|
||||
}
|
||||
@ -5430,7 +5657,8 @@ static int get_bitmap_file(struct mddev * mddev, void __user * arg)
|
||||
if (!buf)
|
||||
goto out;
|
||||
|
||||
ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
|
||||
ptr = d_path(&mddev->bitmap->storage.file->f_path,
|
||||
buf, sizeof(file->pathname));
|
||||
if (IS_ERR(ptr))
|
||||
goto out;
|
||||
|
||||
@ -5875,6 +6103,7 @@ static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
|
||||
mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
|
||||
mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
|
||||
mddev->bitmap_info.offset = 0;
|
||||
|
||||
mddev->reshape_position = MaxSector;
|
||||
@ -5888,6 +6117,7 @@ static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
|
||||
mddev->new_chunk_sectors = mddev->chunk_sectors;
|
||||
mddev->new_layout = mddev->layout;
|
||||
mddev->delta_disks = 0;
|
||||
mddev->reshape_backwards = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -5922,11 +6152,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
|
||||
*/
|
||||
if (mddev->sync_thread)
|
||||
return -EBUSY;
|
||||
if (mddev->bitmap)
|
||||
/* Sorry, cannot grow a bitmap yet, just remove it,
|
||||
* grow, and re-add.
|
||||
*/
|
||||
return -EBUSY;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
sector_t avail = rdev->sectors;
|
||||
|
||||
@ -5944,6 +6170,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
|
||||
static int update_raid_disks(struct mddev *mddev, int raid_disks)
|
||||
{
|
||||
int rv;
|
||||
struct md_rdev *rdev;
|
||||
/* change the number of raid disks */
|
||||
if (mddev->pers->check_reshape == NULL)
|
||||
return -EINVAL;
|
||||
@ -5952,11 +6179,27 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
|
||||
return -EINVAL;
|
||||
if (mddev->sync_thread || mddev->reshape_position != MaxSector)
|
||||
return -EBUSY;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (mddev->raid_disks < raid_disks &&
|
||||
rdev->data_offset < rdev->new_data_offset)
|
||||
return -EINVAL;
|
||||
if (mddev->raid_disks > raid_disks &&
|
||||
rdev->data_offset > rdev->new_data_offset)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mddev->delta_disks = raid_disks - mddev->raid_disks;
|
||||
if (mddev->delta_disks < 0)
|
||||
mddev->reshape_backwards = 1;
|
||||
else if (mddev->delta_disks > 0)
|
||||
mddev->reshape_backwards = 0;
|
||||
|
||||
rv = mddev->pers->check_reshape(mddev);
|
||||
if (rv < 0)
|
||||
if (rv < 0) {
|
||||
mddev->delta_disks = 0;
|
||||
mddev->reshape_backwards = 0;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
@ -6039,6 +6282,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
|
||||
return -EINVAL;
|
||||
mddev->bitmap_info.offset =
|
||||
mddev->bitmap_info.default_offset;
|
||||
mddev->bitmap_info.space =
|
||||
mddev->bitmap_info.default_space;
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
rv = bitmap_create(mddev);
|
||||
if (!rv)
|
||||
@ -6050,7 +6295,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
|
||||
/* remove the bitmap */
|
||||
if (!mddev->bitmap)
|
||||
return -ENOENT;
|
||||
if (mddev->bitmap->file)
|
||||
if (mddev->bitmap->storage.file)
|
||||
return -EINVAL;
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
bitmap_destroy(mddev);
|
||||
@ -6373,6 +6618,9 @@ static int md_open(struct block_device *bdev, fmode_t mode)
|
||||
struct mddev *mddev = mddev_find(bdev->bd_dev);
|
||||
int err;
|
||||
|
||||
if (!mddev)
|
||||
return -ENODEV;
|
||||
|
||||
if (mddev->gendisk != bdev->bd_disk) {
|
||||
/* we are racing with mddev_put which is discarding this
|
||||
* bd_disk.
|
||||
@ -6584,7 +6832,8 @@ static void status_resync(struct seq_file *seq, struct mddev * mddev)
|
||||
|
||||
resync = mddev->curr_resync - atomic_read(&mddev->recovery_active);
|
||||
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||
max_sectors = mddev->resync_max_sectors;
|
||||
else
|
||||
max_sectors = mddev->dev_sectors;
|
||||
@ -7147,7 +7396,7 @@ void md_do_sync(struct mddev *mddev)
|
||||
j = mddev->recovery_cp;
|
||||
|
||||
} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||
max_sectors = mddev->dev_sectors;
|
||||
max_sectors = mddev->resync_max_sectors;
|
||||
else {
|
||||
/* recovery follows the physical size of devices */
|
||||
max_sectors = mddev->dev_sectors;
|
||||
@ -7598,7 +7847,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||
goto unlock;
|
||||
|
||||
if (mddev->pers->sync_request) {
|
||||
if (spares && mddev->bitmap && ! mddev->bitmap->file) {
|
||||
if (spares) {
|
||||
/* We are adding a device or devices to an array
|
||||
* which has the bitmap stored on all devices.
|
||||
* So make sure all bitmap pages get written
|
||||
@ -7646,6 +7895,20 @@ void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
|
||||
}
|
||||
EXPORT_SYMBOL(md_wait_for_blocked_rdev);
|
||||
|
||||
void md_finish_reshape(struct mddev *mddev)
|
||||
{
|
||||
/* called be personality module when reshape completes. */
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (rdev->data_offset > rdev->new_data_offset)
|
||||
rdev->sectors += rdev->data_offset - rdev->new_data_offset;
|
||||
else
|
||||
rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
|
||||
rdev->data_offset = rdev->new_data_offset;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(md_finish_reshape);
|
||||
|
||||
/* Bad block management.
|
||||
* We can record which blocks on each device are 'bad' and so just
|
||||
@ -7894,10 +8157,15 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
|
||||
}
|
||||
|
||||
int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int acknowledged)
|
||||
int is_new)
|
||||
{
|
||||
int rv = md_set_badblocks(&rdev->badblocks,
|
||||
s + rdev->data_offset, sectors, acknowledged);
|
||||
int rv;
|
||||
if (is_new)
|
||||
s += rdev->new_data_offset;
|
||||
else
|
||||
s += rdev->data_offset;
|
||||
rv = md_set_badblocks(&rdev->badblocks,
|
||||
s, sectors, 0);
|
||||
if (rv) {
|
||||
/* Make sure they get written out promptly */
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||
@ -8003,11 +8271,15 @@ out:
|
||||
return rv;
|
||||
}
|
||||
|
||||
int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors)
|
||||
int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int is_new)
|
||||
{
|
||||
if (is_new)
|
||||
s += rdev->new_data_offset;
|
||||
else
|
||||
s += rdev->data_offset;
|
||||
return md_clear_badblocks(&rdev->badblocks,
|
||||
s + rdev->data_offset,
|
||||
sectors);
|
||||
s, sectors);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
|
||||
|
||||
|
@ -55,6 +55,7 @@ struct md_rdev {
|
||||
int sb_loaded;
|
||||
__u64 sb_events;
|
||||
sector_t data_offset; /* start of data in array */
|
||||
sector_t new_data_offset;/* only relevant while reshaping */
|
||||
sector_t sb_start; /* offset of the super block (in 512byte sectors) */
|
||||
int sb_size; /* bytes in the superblock */
|
||||
int preferred_minor; /* autorun support */
|
||||
@ -193,8 +194,9 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
return 0;
|
||||
}
|
||||
extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int acknowledged);
|
||||
extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors);
|
||||
int is_new);
|
||||
extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int is_new);
|
||||
extern void md_ack_all_badblocks(struct badblocks *bb);
|
||||
|
||||
struct mddev {
|
||||
@ -262,6 +264,7 @@ struct mddev {
|
||||
sector_t reshape_position;
|
||||
int delta_disks, new_level, new_layout;
|
||||
int new_chunk_sectors;
|
||||
int reshape_backwards;
|
||||
|
||||
atomic_t plug_cnt; /* If device is expecting
|
||||
* more bios soon.
|
||||
@ -390,10 +393,13 @@ struct mddev {
|
||||
* For external metadata, offset
|
||||
* from start of device.
|
||||
*/
|
||||
unsigned long space; /* space available at this offset */
|
||||
loff_t default_offset; /* this is the offset to use when
|
||||
* hot-adding a bitmap. It should
|
||||
* eventually be settable by sysfs.
|
||||
*/
|
||||
unsigned long default_space; /* space available at
|
||||
* default offset */
|
||||
struct mutex mutex;
|
||||
unsigned long chunksize;
|
||||
unsigned long daemon_sleep; /* how many jiffies between updates? */
|
||||
@ -591,6 +597,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi);
|
||||
extern void md_write_end(struct mddev *mddev);
|
||||
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
|
||||
extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
|
||||
extern void md_finish_reshape(struct mddev *mddev);
|
||||
|
||||
extern int mddev_congested(struct mddev *mddev, int bits);
|
||||
extern void md_flush_request(struct mddev *mddev, struct bio *bio);
|
||||
@ -615,6 +622,7 @@ extern int md_run(struct mddev *mddev);
|
||||
extern void md_stop(struct mddev *mddev);
|
||||
extern void md_stop_writes(struct mddev *mddev);
|
||||
extern int md_rdev_init(struct md_rdev *rdev);
|
||||
extern void md_rdev_clear(struct md_rdev *rdev);
|
||||
|
||||
extern void mddev_suspend(struct mddev *mddev);
|
||||
extern void mddev_resume(struct mddev *mddev);
|
||||
|
@ -1859,7 +1859,9 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (rdev &&
|
||||
test_bit(In_sync, &rdev->flags) &&
|
||||
(test_bit(In_sync, &rdev->flags) ||
|
||||
(!test_bit(Faulty, &rdev->flags) &&
|
||||
rdev->recovery_offset >= sect + s)) &&
|
||||
is_badblock(rdev, sect, s,
|
||||
&first_bad, &bad_sectors) == 0 &&
|
||||
sync_page_io(rdev, sect, s<<9,
|
||||
@ -2024,7 +2026,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
|
||||
continue;
|
||||
if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
|
||||
test_bit(R1BIO_MadeGood, &r1_bio->state)) {
|
||||
rdev_clear_badblocks(rdev, r1_bio->sector, s);
|
||||
rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
|
||||
}
|
||||
if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
|
||||
test_bit(R1BIO_WriteError, &r1_bio->state)) {
|
||||
@ -2044,7 +2046,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
struct md_rdev *rdev = conf->mirrors[m].rdev;
|
||||
rdev_clear_badblocks(rdev,
|
||||
r1_bio->sector,
|
||||
r1_bio->sectors);
|
||||
r1_bio->sectors, 0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
} else if (r1_bio->bios[m] != NULL) {
|
||||
/* This drive got a write error. We need to
|
||||
@ -2598,7 +2600,8 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||
if (!disk->rdev ||
|
||||
!test_bit(In_sync, &disk->rdev->flags)) {
|
||||
disk->head_position = 0;
|
||||
if (disk->rdev)
|
||||
if (disk->rdev &&
|
||||
(disk->rdev->saved_raid_disk < 0))
|
||||
conf->fullsync = 1;
|
||||
} else if (conf->last_used < 0)
|
||||
/*
|
||||
@ -2750,9 +2753,16 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
|
||||
* any io in the removed space completes, but it hardly seems
|
||||
* worth it.
|
||||
*/
|
||||
md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0));
|
||||
if (mddev->array_sectors > raid1_size(mddev, sectors, 0))
|
||||
sector_t newsize = raid1_size(mddev, sectors, 0);
|
||||
if (mddev->external_size &&
|
||||
mddev->array_sectors > newsize)
|
||||
return -EINVAL;
|
||||
if (mddev->bitmap) {
|
||||
int ret = bitmap_resize(mddev->bitmap, newsize, 0, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
md_set_array_sectors(mddev, newsize);
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
revalidate_disk(mddev->gendisk);
|
||||
if (sectors > mddev->dev_sectors &&
|
||||
|
1281
drivers/md/raid10.c
1281
drivers/md/raid10.c
File diff suppressed because it is too large
Load Diff
@ -14,32 +14,38 @@ struct mirror_info {
|
||||
struct r10conf {
|
||||
struct mddev *mddev;
|
||||
struct mirror_info *mirrors;
|
||||
int raid_disks;
|
||||
struct mirror_info *mirrors_new, *mirrors_old;
|
||||
spinlock_t device_lock;
|
||||
|
||||
/* geometry */
|
||||
int near_copies; /* number of copies laid out
|
||||
struct geom {
|
||||
int raid_disks;
|
||||
int near_copies; /* number of copies laid out
|
||||
* raid0 style */
|
||||
int far_copies; /* number of copies laid out
|
||||
int far_copies; /* number of copies laid out
|
||||
* at large strides across drives
|
||||
*/
|
||||
int far_offset; /* far_copies are offset by 1
|
||||
int far_offset; /* far_copies are offset by 1
|
||||
* stripe instead of many
|
||||
*/
|
||||
int copies; /* near_copies * far_copies.
|
||||
* must be <= raid_disks
|
||||
*/
|
||||
sector_t stride; /* distance between far copies.
|
||||
sector_t stride; /* distance between far copies.
|
||||
* This is size / far_copies unless
|
||||
* far_offset, in which case it is
|
||||
* 1 stripe.
|
||||
*/
|
||||
int chunk_shift; /* shift from chunks to sectors */
|
||||
sector_t chunk_mask;
|
||||
} prev, geo;
|
||||
int copies; /* near_copies * far_copies.
|
||||
* must be <= raid_disks
|
||||
*/
|
||||
|
||||
sector_t dev_sectors; /* temp copy of
|
||||
* mddev->dev_sectors */
|
||||
|
||||
int chunk_shift; /* shift from chunks to sectors */
|
||||
sector_t chunk_mask;
|
||||
sector_t reshape_progress;
|
||||
sector_t reshape_safe;
|
||||
unsigned long reshape_checkpoint;
|
||||
sector_t offset_diff;
|
||||
|
||||
struct list_head retry_list;
|
||||
/* queue pending writes and submit them on unplug */
|
||||
@ -136,6 +142,7 @@ enum r10bio_state {
|
||||
R10BIO_Uptodate,
|
||||
R10BIO_IsSync,
|
||||
R10BIO_IsRecover,
|
||||
R10BIO_IsReshape,
|
||||
R10BIO_Degraded,
|
||||
/* Set ReadError on bios that experience a read error
|
||||
* so that raid10d knows what to do with them.
|
||||
@ -146,5 +153,10 @@ enum r10bio_state {
|
||||
*/
|
||||
R10BIO_MadeGood,
|
||||
R10BIO_WriteError,
|
||||
/* During a reshape we might be performing IO on the
|
||||
* 'previous' part of the array, in which case this
|
||||
* flag is set
|
||||
*/
|
||||
R10BIO_Previous,
|
||||
};
|
||||
#endif
|
||||
|
@ -488,6 +488,27 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
|
||||
return sh;
|
||||
}
|
||||
|
||||
/* Determine if 'data_offset' or 'new_data_offset' should be used
|
||||
* in this stripe_head.
|
||||
*/
|
||||
static int use_new_offset(struct r5conf *conf, struct stripe_head *sh)
|
||||
{
|
||||
sector_t progress = conf->reshape_progress;
|
||||
/* Need a memory barrier to make sure we see the value
|
||||
* of conf->generation, or ->data_offset that was set before
|
||||
* reshape_progress was updated.
|
||||
*/
|
||||
smp_rmb();
|
||||
if (progress == MaxSector)
|
||||
return 0;
|
||||
if (sh->generation == conf->generation - 1)
|
||||
return 0;
|
||||
/* We are in a reshape, and this is a new-generation stripe,
|
||||
* so use new_data_offset.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
raid5_end_read_request(struct bio *bi, int error);
|
||||
static void
|
||||
@ -518,6 +539,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
replace_only = 1;
|
||||
} else
|
||||
continue;
|
||||
if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags))
|
||||
rw |= REQ_SYNC;
|
||||
|
||||
bi = &sh->dev[i].req;
|
||||
rbi = &sh->dev[i].rreq; /* For writing to replacement */
|
||||
@ -603,7 +626,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
__func__, (unsigned long long)sh->sector,
|
||||
bi->bi_rw, i);
|
||||
atomic_inc(&sh->count);
|
||||
bi->bi_sector = sh->sector + rdev->data_offset;
|
||||
if (use_new_offset(conf, sh))
|
||||
bi->bi_sector = (sh->sector
|
||||
+ rdev->new_data_offset);
|
||||
else
|
||||
bi->bi_sector = (sh->sector
|
||||
+ rdev->data_offset);
|
||||
bi->bi_flags = 1 << BIO_UPTODATE;
|
||||
bi->bi_idx = 0;
|
||||
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
|
||||
@ -627,7 +655,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
__func__, (unsigned long long)sh->sector,
|
||||
rbi->bi_rw, i);
|
||||
atomic_inc(&sh->count);
|
||||
rbi->bi_sector = sh->sector + rrdev->data_offset;
|
||||
if (use_new_offset(conf, sh))
|
||||
rbi->bi_sector = (sh->sector
|
||||
+ rrdev->new_data_offset);
|
||||
else
|
||||
rbi->bi_sector = (sh->sector
|
||||
+ rrdev->data_offset);
|
||||
rbi->bi_flags = 1 << BIO_UPTODATE;
|
||||
rbi->bi_idx = 0;
|
||||
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
|
||||
@ -1114,6 +1147,8 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
|
||||
dev->sector + STRIPE_SECTORS) {
|
||||
if (wbi->bi_rw & REQ_FUA)
|
||||
set_bit(R5_WantFUA, &dev->flags);
|
||||
if (wbi->bi_rw & REQ_SYNC)
|
||||
set_bit(R5_SyncIO, &dev->flags);
|
||||
tx = async_copy_data(1, wbi, dev->page,
|
||||
dev->sector, tx);
|
||||
wbi = r5_next_bio(wbi, dev->sector);
|
||||
@ -1131,13 +1166,15 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
|
||||
int pd_idx = sh->pd_idx;
|
||||
int qd_idx = sh->qd_idx;
|
||||
int i;
|
||||
bool fua = false;
|
||||
bool fua = false, sync = false;
|
||||
|
||||
pr_debug("%s: stripe %llu\n", __func__,
|
||||
(unsigned long long)sh->sector);
|
||||
|
||||
for (i = disks; i--; )
|
||||
for (i = disks; i--; ) {
|
||||
fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
|
||||
sync |= test_bit(R5_SyncIO, &sh->dev[i].flags);
|
||||
}
|
||||
|
||||
for (i = disks; i--; ) {
|
||||
struct r5dev *dev = &sh->dev[i];
|
||||
@ -1146,6 +1183,8 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
|
||||
set_bit(R5_UPTODATE, &dev->flags);
|
||||
if (fua)
|
||||
set_bit(R5_WantFUA, &dev->flags);
|
||||
if (sync)
|
||||
set_bit(R5_SyncIO, &dev->flags);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1648,7 +1687,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
||||
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
|
||||
char b[BDEVNAME_SIZE];
|
||||
struct md_rdev *rdev = NULL;
|
||||
|
||||
sector_t s;
|
||||
|
||||
for (i=0 ; i<disks; i++)
|
||||
if (bi == &sh->dev[i].req)
|
||||
@ -1671,6 +1710,10 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
||||
if (!rdev)
|
||||
rdev = conf->disks[i].rdev;
|
||||
|
||||
if (use_new_offset(conf, sh))
|
||||
s = sh->sector + rdev->new_data_offset;
|
||||
else
|
||||
s = sh->sector + rdev->data_offset;
|
||||
if (uptodate) {
|
||||
set_bit(R5_UPTODATE, &sh->dev[i].flags);
|
||||
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
|
||||
@ -1683,8 +1726,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
||||
"md/raid:%s: read error corrected"
|
||||
" (%lu sectors at %llu on %s)\n",
|
||||
mdname(conf->mddev), STRIPE_SECTORS,
|
||||
(unsigned long long)(sh->sector
|
||||
+ rdev->data_offset),
|
||||
(unsigned long long)s,
|
||||
bdevname(rdev->bdev, b));
|
||||
atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
|
||||
clear_bit(R5_ReadError, &sh->dev[i].flags);
|
||||
@ -1704,8 +1746,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
||||
"md/raid:%s: read error on replacement device "
|
||||
"(sector %llu on %s).\n",
|
||||
mdname(conf->mddev),
|
||||
(unsigned long long)(sh->sector
|
||||
+ rdev->data_offset),
|
||||
(unsigned long long)s,
|
||||
bdn);
|
||||
else if (conf->mddev->degraded >= conf->max_degraded)
|
||||
printk_ratelimited(
|
||||
@ -1713,8 +1754,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
||||
"md/raid:%s: read error not correctable "
|
||||
"(sector %llu on %s).\n",
|
||||
mdname(conf->mddev),
|
||||
(unsigned long long)(sh->sector
|
||||
+ rdev->data_offset),
|
||||
(unsigned long long)s,
|
||||
bdn);
|
||||
else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
|
||||
/* Oh, no!!! */
|
||||
@ -1723,8 +1763,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
|
||||
"md/raid:%s: read error NOT corrected!! "
|
||||
"(sector %llu on %s).\n",
|
||||
mdname(conf->mddev),
|
||||
(unsigned long long)(sh->sector
|
||||
+ rdev->data_offset),
|
||||
(unsigned long long)s,
|
||||
bdn);
|
||||
else if (atomic_read(&rdev->read_errors)
|
||||
> conf->max_nr_stripes)
|
||||
@ -3561,7 +3600,7 @@ finish:
|
||||
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev_clear_badblocks(rdev, sh->sector,
|
||||
STRIPE_SECTORS);
|
||||
STRIPE_SECTORS, 0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
|
||||
@ -3570,7 +3609,7 @@ finish:
|
||||
/* rdev have been moved down */
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev_clear_badblocks(rdev, sh->sector,
|
||||
STRIPE_SECTORS);
|
||||
STRIPE_SECTORS, 0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
}
|
||||
@ -3842,6 +3881,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
|
||||
raid_bio->bi_next = (void*)rdev;
|
||||
align_bi->bi_bdev = rdev->bdev;
|
||||
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
|
||||
/* No reshape active, so we can trust rdev->data_offset */
|
||||
align_bi->bi_sector += rdev->data_offset;
|
||||
|
||||
if (!bio_fits_rdev(align_bi) ||
|
||||
@ -3953,12 +3993,10 @@ static void make_request(struct mddev *mddev, struct bio * bi)
|
||||
plugged = mddev_check_plugged(mddev);
|
||||
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
|
||||
DEFINE_WAIT(w);
|
||||
int disks, data_disks;
|
||||
int previous;
|
||||
|
||||
retry:
|
||||
previous = 0;
|
||||
disks = conf->raid_disks;
|
||||
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
|
||||
if (unlikely(conf->reshape_progress != MaxSector)) {
|
||||
/* spinlock is needed as reshape_progress may be
|
||||
@ -3970,13 +4008,12 @@ static void make_request(struct mddev *mddev, struct bio * bi)
|
||||
* to check again.
|
||||
*/
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
if (mddev->delta_disks < 0
|
||||
if (mddev->reshape_backwards
|
||||
? logical_sector < conf->reshape_progress
|
||||
: logical_sector >= conf->reshape_progress) {
|
||||
disks = conf->previous_raid_disks;
|
||||
previous = 1;
|
||||
} else {
|
||||
if (mddev->delta_disks < 0
|
||||
if (mddev->reshape_backwards
|
||||
? logical_sector < conf->reshape_safe
|
||||
: logical_sector >= conf->reshape_safe) {
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
@ -3986,7 +4023,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
|
||||
}
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
}
|
||||
data_disks = disks - conf->max_degraded;
|
||||
|
||||
new_sector = raid5_compute_sector(conf, logical_sector,
|
||||
previous,
|
||||
@ -4009,7 +4045,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
|
||||
*/
|
||||
int must_retry = 0;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
if (mddev->delta_disks < 0
|
||||
if (mddev->reshape_backwards
|
||||
? logical_sector >= conf->reshape_progress
|
||||
: logical_sector < conf->reshape_progress)
|
||||
/* mismatch, need to try again */
|
||||
@ -4108,11 +4144,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
|
||||
if (sector_nr == 0) {
|
||||
/* If restarting in the middle, skip the initial sectors */
|
||||
if (mddev->delta_disks < 0 &&
|
||||
if (mddev->reshape_backwards &&
|
||||
conf->reshape_progress < raid5_size(mddev, 0, 0)) {
|
||||
sector_nr = raid5_size(mddev, 0, 0)
|
||||
- conf->reshape_progress;
|
||||
} else if (mddev->delta_disks >= 0 &&
|
||||
} else if (!mddev->reshape_backwards &&
|
||||
conf->reshape_progress > 0)
|
||||
sector_nr = conf->reshape_progress;
|
||||
sector_div(sector_nr, new_data_disks);
|
||||
@ -4133,13 +4169,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
else
|
||||
reshape_sectors = mddev->chunk_sectors;
|
||||
|
||||
/* we update the metadata when there is more than 3Meg
|
||||
* in the block range (that is rather arbitrary, should
|
||||
* probably be time based) or when the data about to be
|
||||
* copied would over-write the source of the data at
|
||||
* the front of the range.
|
||||
* i.e. one new_stripe along from reshape_progress new_maps
|
||||
* to after where reshape_safe old_maps to
|
||||
/* We update the metadata at least every 10 seconds, or when
|
||||
* the data about to be copied would over-write the source of
|
||||
* the data at the front of the range. i.e. one new_stripe
|
||||
* along from reshape_progress new_maps to after where
|
||||
* reshape_safe old_maps to
|
||||
*/
|
||||
writepos = conf->reshape_progress;
|
||||
sector_div(writepos, new_data_disks);
|
||||
@ -4147,7 +4181,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
sector_div(readpos, data_disks);
|
||||
safepos = conf->reshape_safe;
|
||||
sector_div(safepos, data_disks);
|
||||
if (mddev->delta_disks < 0) {
|
||||
if (mddev->reshape_backwards) {
|
||||
writepos -= min_t(sector_t, reshape_sectors, writepos);
|
||||
readpos += reshape_sectors;
|
||||
safepos += reshape_sectors;
|
||||
@ -4157,11 +4191,29 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
safepos -= min_t(sector_t, reshape_sectors, safepos);
|
||||
}
|
||||
|
||||
/* Having calculated the 'writepos' possibly use it
|
||||
* to set 'stripe_addr' which is where we will write to.
|
||||
*/
|
||||
if (mddev->reshape_backwards) {
|
||||
BUG_ON(conf->reshape_progress == 0);
|
||||
stripe_addr = writepos;
|
||||
BUG_ON((mddev->dev_sectors &
|
||||
~((sector_t)reshape_sectors - 1))
|
||||
- reshape_sectors - stripe_addr
|
||||
!= sector_nr);
|
||||
} else {
|
||||
BUG_ON(writepos != sector_nr + reshape_sectors);
|
||||
stripe_addr = sector_nr;
|
||||
}
|
||||
|
||||
/* 'writepos' is the most advanced device address we might write.
|
||||
* 'readpos' is the least advanced device address we might read.
|
||||
* 'safepos' is the least address recorded in the metadata as having
|
||||
* been reshaped.
|
||||
* If 'readpos' is behind 'writepos', then there is no way that we can
|
||||
* If there is a min_offset_diff, these are adjusted either by
|
||||
* increasing the safepos/readpos if diff is negative, or
|
||||
* increasing writepos if diff is positive.
|
||||
* If 'readpos' is then behind 'writepos', there is no way that we can
|
||||
* ensure safety in the face of a crash - that must be done by userspace
|
||||
* making a backup of the data. So in that case there is no particular
|
||||
* rush to update metadata.
|
||||
@ -4174,7 +4226,13 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
* Maybe that number should be configurable, but I'm not sure it is
|
||||
* worth it.... maybe it could be a multiple of safemode_delay???
|
||||
*/
|
||||
if ((mddev->delta_disks < 0
|
||||
if (conf->min_offset_diff < 0) {
|
||||
safepos += -conf->min_offset_diff;
|
||||
readpos += -conf->min_offset_diff;
|
||||
} else
|
||||
writepos += conf->min_offset_diff;
|
||||
|
||||
if ((mddev->reshape_backwards
|
||||
? (safepos > writepos && readpos < writepos)
|
||||
: (safepos < writepos && readpos > writepos)) ||
|
||||
time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
|
||||
@ -4195,17 +4253,6 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
||||
}
|
||||
|
||||
if (mddev->delta_disks < 0) {
|
||||
BUG_ON(conf->reshape_progress == 0);
|
||||
stripe_addr = writepos;
|
||||
BUG_ON((mddev->dev_sectors &
|
||||
~((sector_t)reshape_sectors - 1))
|
||||
- reshape_sectors - stripe_addr
|
||||
!= sector_nr);
|
||||
} else {
|
||||
BUG_ON(writepos != sector_nr + reshape_sectors);
|
||||
stripe_addr = sector_nr;
|
||||
}
|
||||
INIT_LIST_HEAD(&stripes);
|
||||
for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
|
||||
int j;
|
||||
@ -4239,7 +4286,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
list_add(&sh->lru, &stripes);
|
||||
}
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
if (mddev->delta_disks < 0)
|
||||
if (mddev->reshape_backwards)
|
||||
conf->reshape_progress -= reshape_sectors * new_data_disks;
|
||||
else
|
||||
conf->reshape_progress += reshape_sectors * new_data_disks;
|
||||
@ -4952,16 +4999,42 @@ static int run(struct mddev *mddev)
|
||||
struct md_rdev *rdev;
|
||||
sector_t reshape_offset = 0;
|
||||
int i;
|
||||
long long min_offset_diff = 0;
|
||||
int first = 1;
|
||||
|
||||
if (mddev->recovery_cp != MaxSector)
|
||||
printk(KERN_NOTICE "md/raid:%s: not clean"
|
||||
" -- starting background reconstruction\n",
|
||||
mdname(mddev));
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
long long diff;
|
||||
if (rdev->raid_disk < 0)
|
||||
continue;
|
||||
diff = (rdev->new_data_offset - rdev->data_offset);
|
||||
if (first) {
|
||||
min_offset_diff = diff;
|
||||
first = 0;
|
||||
} else if (mddev->reshape_backwards &&
|
||||
diff < min_offset_diff)
|
||||
min_offset_diff = diff;
|
||||
else if (!mddev->reshape_backwards &&
|
||||
diff > min_offset_diff)
|
||||
min_offset_diff = diff;
|
||||
}
|
||||
|
||||
if (mddev->reshape_position != MaxSector) {
|
||||
/* Check that we can continue the reshape.
|
||||
* Currently only disks can change, it must
|
||||
* increase, and we must be past the point where
|
||||
* a stripe over-writes itself
|
||||
* Difficulties arise if the stripe we would write to
|
||||
* next is at or after the stripe we would read from next.
|
||||
* For a reshape that changes the number of devices, this
|
||||
* is only possible for a very short time, and mdadm makes
|
||||
* sure that time appears to have past before assembling
|
||||
* the array. So we fail if that time hasn't passed.
|
||||
* For a reshape that keeps the number of devices the same
|
||||
* mdadm must be monitoring the reshape can keeping the
|
||||
* critical areas read-only and backed up. It will start
|
||||
* the array in read-only mode, so we check for that.
|
||||
*/
|
||||
sector_t here_new, here_old;
|
||||
int old_disks;
|
||||
@ -4993,26 +5066,34 @@ static int run(struct mddev *mddev)
|
||||
/* here_old is the first stripe that we might need to read
|
||||
* from */
|
||||
if (mddev->delta_disks == 0) {
|
||||
if ((here_new * mddev->new_chunk_sectors !=
|
||||
here_old * mddev->chunk_sectors)) {
|
||||
printk(KERN_ERR "md/raid:%s: reshape position is"
|
||||
" confused - aborting\n", mdname(mddev));
|
||||
return -EINVAL;
|
||||
}
|
||||
/* We cannot be sure it is safe to start an in-place
|
||||
* reshape. It is only safe if user-space if monitoring
|
||||
* reshape. It is only safe if user-space is monitoring
|
||||
* and taking constant backups.
|
||||
* mdadm always starts a situation like this in
|
||||
* readonly mode so it can take control before
|
||||
* allowing any writes. So just check for that.
|
||||
*/
|
||||
if ((here_new * mddev->new_chunk_sectors !=
|
||||
here_old * mddev->chunk_sectors) ||
|
||||
mddev->ro == 0) {
|
||||
printk(KERN_ERR "md/raid:%s: in-place reshape must be started"
|
||||
" in read-only mode - aborting\n",
|
||||
if (abs(min_offset_diff) >= mddev->chunk_sectors &&
|
||||
abs(min_offset_diff) >= mddev->new_chunk_sectors)
|
||||
/* not really in-place - so OK */;
|
||||
else if (mddev->ro == 0) {
|
||||
printk(KERN_ERR "md/raid:%s: in-place reshape "
|
||||
"must be started in read-only mode "
|
||||
"- aborting\n",
|
||||
mdname(mddev));
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (mddev->delta_disks < 0
|
||||
? (here_new * mddev->new_chunk_sectors <=
|
||||
} else if (mddev->reshape_backwards
|
||||
? (here_new * mddev->new_chunk_sectors + min_offset_diff <=
|
||||
here_old * mddev->chunk_sectors)
|
||||
: (here_new * mddev->new_chunk_sectors >=
|
||||
here_old * mddev->chunk_sectors)) {
|
||||
here_old * mddev->chunk_sectors + (-min_offset_diff))) {
|
||||
/* Reading from the same stripe as writing to - bad */
|
||||
printk(KERN_ERR "md/raid:%s: reshape_position too early for "
|
||||
"auto-recovery - aborting.\n",
|
||||
@ -5037,6 +5118,7 @@ static int run(struct mddev *mddev)
|
||||
if (IS_ERR(conf))
|
||||
return PTR_ERR(conf);
|
||||
|
||||
conf->min_offset_diff = min_offset_diff;
|
||||
mddev->thread = conf->thread;
|
||||
conf->thread = NULL;
|
||||
mddev->private = conf;
|
||||
@ -5182,9 +5264,12 @@ static int run(struct mddev *mddev)
|
||||
blk_queue_io_opt(mddev->queue, chunk_size *
|
||||
(conf->raid_disks - conf->max_degraded));
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
rdev_for_each(rdev, mddev) {
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->new_data_offset << 9);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -5418,12 +5503,18 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
|
||||
* any io in the removed space completes, but it hardly seems
|
||||
* worth it.
|
||||
*/
|
||||
sector_t newsize;
|
||||
sectors &= ~((sector_t)mddev->chunk_sectors - 1);
|
||||
md_set_array_sectors(mddev, raid5_size(mddev, sectors,
|
||||
mddev->raid_disks));
|
||||
if (mddev->array_sectors >
|
||||
raid5_size(mddev, sectors, mddev->raid_disks))
|
||||
newsize = raid5_size(mddev, sectors, mddev->raid_disks);
|
||||
if (mddev->external_size &&
|
||||
mddev->array_sectors > newsize)
|
||||
return -EINVAL;
|
||||
if (mddev->bitmap) {
|
||||
int ret = bitmap_resize(mddev->bitmap, sectors, 0, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
md_set_array_sectors(mddev, newsize);
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
revalidate_disk(mddev->gendisk);
|
||||
if (sectors > mddev->dev_sectors &&
|
||||
@ -5468,9 +5559,6 @@ static int check_reshape(struct mddev *mddev)
|
||||
mddev->new_layout == mddev->layout &&
|
||||
mddev->new_chunk_sectors == mddev->chunk_sectors)
|
||||
return 0; /* nothing to do */
|
||||
if (mddev->bitmap)
|
||||
/* Cannot grow a bitmap yet */
|
||||
return -EBUSY;
|
||||
if (has_failed(conf))
|
||||
return -EINVAL;
|
||||
if (mddev->delta_disks < 0) {
|
||||
@ -5505,10 +5593,14 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||
if (!check_stripe_cache(mddev))
|
||||
return -ENOSPC;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
if (has_failed(conf))
|
||||
return -EINVAL;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (!test_bit(In_sync, &rdev->flags)
|
||||
&& !test_bit(Faulty, &rdev->flags))
|
||||
spares++;
|
||||
}
|
||||
|
||||
if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
|
||||
/* Not enough devices even to make a degraded array
|
||||
@ -5535,12 +5627,16 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||
conf->chunk_sectors = mddev->new_chunk_sectors;
|
||||
conf->prev_algo = conf->algorithm;
|
||||
conf->algorithm = mddev->new_layout;
|
||||
if (mddev->delta_disks < 0)
|
||||
conf->generation++;
|
||||
/* Code that selects data_offset needs to see the generation update
|
||||
* if reshape_progress has been set - so a memory barrier needed.
|
||||
*/
|
||||
smp_mb();
|
||||
if (mddev->reshape_backwards)
|
||||
conf->reshape_progress = raid5_size(mddev, 0, 0);
|
||||
else
|
||||
conf->reshape_progress = 0;
|
||||
conf->reshape_safe = conf->reshape_progress;
|
||||
conf->generation++;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
|
||||
/* Add some new drives, as many as will fit.
|
||||
@ -5592,6 +5688,9 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||
mddev->recovery = 0;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
|
||||
rdev_for_each(rdev, mddev)
|
||||
rdev->new_data_offset = rdev->data_offset;
|
||||
smp_wmb();
|
||||
conf->reshape_progress = MaxSector;
|
||||
mddev->reshape_position = MaxSector;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
@ -5610,9 +5709,13 @@ static void end_reshape(struct r5conf *conf)
|
||||
{
|
||||
|
||||
if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
|
||||
struct md_rdev *rdev;
|
||||
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
conf->previous_raid_disks = conf->raid_disks;
|
||||
rdev_for_each(rdev, conf->mddev)
|
||||
rdev->data_offset = rdev->new_data_offset;
|
||||
smp_wmb();
|
||||
conf->reshape_progress = MaxSector;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
wake_up(&conf->wait_for_overlap);
|
||||
@ -5652,17 +5755,18 @@ static void raid5_finish_reshape(struct mddev *mddev)
|
||||
d < conf->raid_disks - mddev->delta_disks;
|
||||
d++) {
|
||||
struct md_rdev *rdev = conf->disks[d].rdev;
|
||||
if (rdev &&
|
||||
raid5_remove_disk(mddev, rdev) == 0) {
|
||||
sysfs_unlink_rdev(mddev, rdev);
|
||||
rdev->raid_disk = -1;
|
||||
}
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
rdev = conf->disks[d].replacement;
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
}
|
||||
}
|
||||
mddev->layout = conf->algorithm;
|
||||
mddev->chunk_sectors = conf->chunk_sectors;
|
||||
mddev->reshape_position = MaxSector;
|
||||
mddev->delta_disks = 0;
|
||||
mddev->reshape_backwards = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -285,6 +285,7 @@ enum r5dev_flags {
|
||||
*/
|
||||
R5_Wantdrain, /* dev->towrite needs to be drained */
|
||||
R5_WantFUA, /* Write should be FUA */
|
||||
R5_SyncIO, /* The IO is sync */
|
||||
R5_WriteError, /* got a write error - need to record it */
|
||||
R5_MadeGood, /* A bad block has been fixed by writing to it */
|
||||
R5_ReadRepl, /* Will/did read from replacement rather than orig */
|
||||
@ -385,6 +386,12 @@ struct r5conf {
|
||||
short generation; /* increments with every reshape */
|
||||
unsigned long reshape_checkpoint; /* Time we last updated
|
||||
* metadata */
|
||||
long long min_offset_diff; /* minimum difference between
|
||||
* data_offset and
|
||||
* new_data_offset across all
|
||||
* devices. May be negative,
|
||||
* but is closest to zero.
|
||||
*/
|
||||
|
||||
struct list_head handle_list; /* stripes needing handling */
|
||||
struct list_head hold_list; /* preread ready stripes */
|
||||
|
@ -233,7 +233,10 @@ struct mdp_superblock_1 {
|
||||
__le32 delta_disks; /* change in number of raid_disks */
|
||||
__le32 new_layout; /* new layout */
|
||||
__le32 new_chunk; /* new chunk size (512byte sectors) */
|
||||
__u8 pad1[128-124]; /* set to 0 when written */
|
||||
__le32 new_offset; /* signed number to add to data_offset in new
|
||||
* layout. 0 == no-change. This can be
|
||||
* different on each device in the array.
|
||||
*/
|
||||
|
||||
/* constant this-device information - 64 bytes */
|
||||
__le64 data_offset; /* sector start of data, often 0 */
|
||||
@ -281,10 +284,18 @@ struct mdp_superblock_1 {
|
||||
* active device with same 'role'.
|
||||
* 'recovery_offset' is also set.
|
||||
*/
|
||||
#define MD_FEATURE_RESHAPE_BACKWARDS 32 /* Reshape doesn't change number
|
||||
* of devices, but is going
|
||||
* backwards anyway.
|
||||
*/
|
||||
#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */
|
||||
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|
||||
|MD_FEATURE_RECOVERY_OFFSET \
|
||||
|MD_FEATURE_RESHAPE_ACTIVE \
|
||||
|MD_FEATURE_BAD_BLOCKS \
|
||||
|MD_FEATURE_REPLACEMENT)
|
||||
|MD_FEATURE_REPLACEMENT \
|
||||
|MD_FEATURE_RESHAPE_BACKWARDS \
|
||||
|MD_FEATURE_NEW_OFFSET \
|
||||
)
|
||||
|
||||
#endif
|
||||
|
@ -99,8 +99,20 @@ extern const struct raid6_calls raid6_altivec2;
|
||||
extern const struct raid6_calls raid6_altivec4;
|
||||
extern const struct raid6_calls raid6_altivec8;
|
||||
|
||||
struct raid6_recov_calls {
|
||||
void (*data2)(int, size_t, int, int, void **);
|
||||
void (*datap)(int, size_t, int, void **);
|
||||
int (*valid)(void);
|
||||
const char *name;
|
||||
int priority;
|
||||
};
|
||||
|
||||
extern const struct raid6_recov_calls raid6_recov_intx1;
|
||||
extern const struct raid6_recov_calls raid6_recov_ssse3;
|
||||
|
||||
/* Algorithm list */
|
||||
extern const struct raid6_calls * const raid6_algos[];
|
||||
extern const struct raid6_recov_calls *const raid6_recov_algos[];
|
||||
int raid6_select_algo(void);
|
||||
|
||||
/* Return values from chk_syndrome */
|
||||
@ -111,14 +123,16 @@ int raid6_select_algo(void);
|
||||
|
||||
/* Galois field tables */
|
||||
extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256)));
|
||||
extern const u8 raid6_vgfmul[256][32] __attribute__((aligned(256)));
|
||||
extern const u8 raid6_gfexp[256] __attribute__((aligned(256)));
|
||||
extern const u8 raid6_gfinv[256] __attribute__((aligned(256)));
|
||||
extern const u8 raid6_gfexi[256] __attribute__((aligned(256)));
|
||||
|
||||
/* Recovery routines */
|
||||
void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
|
||||
extern void (*raid6_2data_recov)(int disks, size_t bytes, int faila, int failb,
|
||||
void **ptrs);
|
||||
void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs);
|
||||
extern void (*raid6_datap_recov)(int disks, size_t bytes, int faila,
|
||||
void **ptrs);
|
||||
void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
|
||||
void **ptrs);
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
|
||||
|
||||
raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
|
||||
raid6_pq-y += algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \
|
||||
int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
|
||||
altivec8.o mmx.o sse1.o sse2.o
|
||||
hostprogs-y += mktables
|
||||
|
@ -17,11 +17,11 @@
|
||||
*/
|
||||
|
||||
#include <linux/raid/pq.h>
|
||||
#include <linux/module.h>
|
||||
#ifndef __KERNEL__
|
||||
#include <sys/mman.h>
|
||||
#include <stdio.h>
|
||||
#else
|
||||
#include <linux/module.h>
|
||||
#include <linux/gfp.h>
|
||||
#if !RAID6_USE_EMPTY_ZERO_PAGE
|
||||
/* In .bss so it's zeroed */
|
||||
@ -34,10 +34,6 @@ struct raid6_calls raid6_call;
|
||||
EXPORT_SYMBOL_GPL(raid6_call);
|
||||
|
||||
const struct raid6_calls * const raid6_algos[] = {
|
||||
&raid6_intx1,
|
||||
&raid6_intx2,
|
||||
&raid6_intx4,
|
||||
&raid6_intx8,
|
||||
#if defined(__ia64__)
|
||||
&raid6_intx16,
|
||||
&raid6_intx32,
|
||||
@ -61,6 +57,24 @@ const struct raid6_calls * const raid6_algos[] = {
|
||||
&raid6_altivec4,
|
||||
&raid6_altivec8,
|
||||
#endif
|
||||
&raid6_intx1,
|
||||
&raid6_intx2,
|
||||
&raid6_intx4,
|
||||
&raid6_intx8,
|
||||
NULL
|
||||
};
|
||||
|
||||
void (*raid6_2data_recov)(int, size_t, int, int, void **);
|
||||
EXPORT_SYMBOL_GPL(raid6_2data_recov);
|
||||
|
||||
void (*raid6_datap_recov)(int, size_t, int, void **);
|
||||
EXPORT_SYMBOL_GPL(raid6_datap_recov);
|
||||
|
||||
const struct raid6_recov_calls *const raid6_recov_algos[] = {
|
||||
#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
|
||||
&raid6_recov_ssse3,
|
||||
#endif
|
||||
&raid6_recov_intx1,
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -72,59 +86,55 @@ const struct raid6_calls * const raid6_algos[] = {
|
||||
#define time_before(x, y) ((x) < (y))
|
||||
#endif
|
||||
|
||||
/* Try to pick the best algorithm */
|
||||
/* This code uses the gfmul table as convenient data set to abuse */
|
||||
|
||||
int __init raid6_select_algo(void)
|
||||
static inline const struct raid6_recov_calls *raid6_choose_recov(void)
|
||||
{
|
||||
const struct raid6_calls * const * algo;
|
||||
const struct raid6_calls * best;
|
||||
char *syndromes;
|
||||
void *dptrs[(65536/PAGE_SIZE)+2];
|
||||
int i, disks;
|
||||
unsigned long perf, bestperf;
|
||||
int bestprefer;
|
||||
unsigned long j0, j1;
|
||||
const struct raid6_recov_calls *const *algo;
|
||||
const struct raid6_recov_calls *best;
|
||||
|
||||
disks = (65536/PAGE_SIZE)+2;
|
||||
for ( i = 0 ; i < disks-2 ; i++ ) {
|
||||
dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
|
||||
}
|
||||
for (best = NULL, algo = raid6_recov_algos; *algo; algo++)
|
||||
if (!best || (*algo)->priority > best->priority)
|
||||
if (!(*algo)->valid || (*algo)->valid())
|
||||
best = *algo;
|
||||
|
||||
/* Normal code - use a 2-page allocation to avoid D$ conflict */
|
||||
syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
|
||||
if (best) {
|
||||
raid6_2data_recov = best->data2;
|
||||
raid6_datap_recov = best->datap;
|
||||
|
||||
if ( !syndromes ) {
|
||||
printk("raid6: Yikes! No memory available.\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
printk("raid6: using %s recovery algorithm\n", best->name);
|
||||
} else
|
||||
printk("raid6: Yikes! No recovery algorithm found!\n");
|
||||
|
||||
dptrs[disks-2] = syndromes;
|
||||
dptrs[disks-1] = syndromes + PAGE_SIZE;
|
||||
return best;
|
||||
}
|
||||
|
||||
bestperf = 0; bestprefer = 0; best = NULL;
|
||||
static inline const struct raid6_calls *raid6_choose_gen(
|
||||
void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
|
||||
{
|
||||
unsigned long perf, bestperf, j0, j1;
|
||||
const struct raid6_calls *const *algo;
|
||||
const struct raid6_calls *best;
|
||||
|
||||
for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
|
||||
if (!best || (*algo)->prefer >= best->prefer) {
|
||||
if ((*algo)->valid && !(*algo)->valid())
|
||||
continue;
|
||||
|
||||
for ( algo = raid6_algos ; *algo ; algo++ ) {
|
||||
if ( !(*algo)->valid || (*algo)->valid() ) {
|
||||
perf = 0;
|
||||
|
||||
preempt_disable();
|
||||
j0 = jiffies;
|
||||
while ( (j1 = jiffies) == j0 )
|
||||
while ((j1 = jiffies) == j0)
|
||||
cpu_relax();
|
||||
while (time_before(jiffies,
|
||||
j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
|
||||
(*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
|
||||
(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
|
||||
perf++;
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
if ( (*algo)->prefer > bestprefer ||
|
||||
((*algo)->prefer == bestprefer &&
|
||||
perf > bestperf) ) {
|
||||
best = *algo;
|
||||
bestprefer = best->prefer;
|
||||
if (perf > bestperf) {
|
||||
bestperf = perf;
|
||||
best = *algo;
|
||||
}
|
||||
printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
|
||||
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
|
||||
@ -139,9 +149,46 @@ int __init raid6_select_algo(void)
|
||||
} else
|
||||
printk("raid6: Yikes! No algorithm found!\n");
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
|
||||
/* Try to pick the best algorithm */
|
||||
/* This code uses the gfmul table as convenient data set to abuse */
|
||||
|
||||
int __init raid6_select_algo(void)
|
||||
{
|
||||
const int disks = (65536/PAGE_SIZE)+2;
|
||||
|
||||
const struct raid6_calls *gen_best;
|
||||
const struct raid6_recov_calls *rec_best;
|
||||
char *syndromes;
|
||||
void *dptrs[(65536/PAGE_SIZE)+2];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < disks-2; i++)
|
||||
dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
|
||||
|
||||
/* Normal code - use a 2-page allocation to avoid D$ conflict */
|
||||
syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
|
||||
|
||||
if (!syndromes) {
|
||||
printk("raid6: Yikes! No memory available.\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dptrs[disks-2] = syndromes;
|
||||
dptrs[disks-1] = syndromes + PAGE_SIZE;
|
||||
|
||||
/* select raid gen_syndrome function */
|
||||
gen_best = raid6_choose_gen(&dptrs, disks);
|
||||
|
||||
/* select raid recover functions */
|
||||
rec_best = raid6_choose_recov();
|
||||
|
||||
free_pages((unsigned long)syndromes, 1);
|
||||
|
||||
return best ? 0 : -EINVAL;
|
||||
return gen_best && rec_best ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
static void raid6_exit(void)
|
||||
|
@ -81,6 +81,31 @@ int main(int argc, char *argv[])
|
||||
printf("EXPORT_SYMBOL(raid6_gfmul);\n");
|
||||
printf("#endif\n");
|
||||
|
||||
/* Compute vector multiplication table */
|
||||
printf("\nconst u8 __attribute__((aligned(256)))\n"
|
||||
"raid6_vgfmul[256][32] =\n"
|
||||
"{\n");
|
||||
for (i = 0; i < 256; i++) {
|
||||
printf("\t{\n");
|
||||
for (j = 0; j < 16; j += 8) {
|
||||
printf("\t\t");
|
||||
for (k = 0; k < 8; k++)
|
||||
printf("0x%02x,%c", gfmul(i, j + k),
|
||||
(k == 7) ? '\n' : ' ');
|
||||
}
|
||||
for (j = 0; j < 16; j += 8) {
|
||||
printf("\t\t");
|
||||
for (k = 0; k < 8; k++)
|
||||
printf("0x%02x,%c", gfmul(i, (j + k) << 4),
|
||||
(k == 7) ? '\n' : ' ');
|
||||
}
|
||||
printf("\t},\n");
|
||||
}
|
||||
printf("};\n");
|
||||
printf("#ifdef __KERNEL__\n");
|
||||
printf("EXPORT_SYMBOL(raid6_vgfmul);\n");
|
||||
printf("#endif\n");
|
||||
|
||||
/* Compute power-of-2 table (exponent) */
|
||||
v = 1;
|
||||
printf("\nconst u8 __attribute__((aligned(256)))\n"
|
||||
|
@ -22,7 +22,7 @@
|
||||
#include <linux/raid/pq.h>
|
||||
|
||||
/* Recover two failed data blocks. */
|
||||
void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
|
||||
void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb,
|
||||
void **ptrs)
|
||||
{
|
||||
u8 *p, *q, *dp, *dq;
|
||||
@ -64,10 +64,9 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
|
||||
p++; q++;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(raid6_2data_recov);
|
||||
|
||||
/* Recover failure of one data block plus the P block */
|
||||
void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
|
||||
void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs)
|
||||
{
|
||||
u8 *p, *q, *dq;
|
||||
const u8 *qmul; /* Q multiplier table */
|
||||
@ -96,7 +95,15 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
|
||||
q++; dq++;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(raid6_datap_recov);
|
||||
|
||||
|
||||
const struct raid6_recov_calls raid6_recov_intx1 = {
|
||||
.data2 = raid6_2data_recov_intx1,
|
||||
.datap = raid6_datap_recov_intx1,
|
||||
.valid = NULL,
|
||||
.name = "intx1",
|
||||
.priority = 0,
|
||||
};
|
||||
|
||||
#ifndef __KERNEL__
|
||||
/* Testing only */
|
||||
|
335
lib/raid6/recov_ssse3.c
Normal file
335
lib/raid6/recov_ssse3.c
Normal file
@ -0,0 +1,335 @@
|
||||
/*
|
||||
* Copyright (C) 2012 Intel Corporation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
|
||||
#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
|
||||
|
||||
#include <linux/raid/pq.h>
|
||||
#include "x86.h"
|
||||
|
||||
static int raid6_has_ssse3(void)
|
||||
{
|
||||
return boot_cpu_has(X86_FEATURE_XMM) &&
|
||||
boot_cpu_has(X86_FEATURE_XMM2) &&
|
||||
boot_cpu_has(X86_FEATURE_SSSE3);
|
||||
}
|
||||
|
||||
void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb,
|
||||
void **ptrs)
|
||||
{
|
||||
u8 *p, *q, *dp, *dq;
|
||||
const u8 *pbmul; /* P multiplier table for B data */
|
||||
const u8 *qmul; /* Q multiplier table (for both) */
|
||||
static const u8 __aligned(16) x0f[16] = {
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
|
||||
|
||||
p = (u8 *)ptrs[disks-2];
|
||||
q = (u8 *)ptrs[disks-1];
|
||||
|
||||
/* Compute syndrome with zero for the missing data pages
|
||||
Use the dead data pages as temporary storage for
|
||||
delta p and delta q */
|
||||
dp = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[disks-2] = dp;
|
||||
dq = (u8 *)ptrs[failb];
|
||||
ptrs[failb] = (void *)raid6_empty_zero_page;
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
||||
/* Restore pointer table */
|
||||
ptrs[faila] = dp;
|
||||
ptrs[failb] = dq;
|
||||
ptrs[disks-2] = p;
|
||||
ptrs[disks-1] = q;
|
||||
|
||||
/* Now, pick the proper data tables */
|
||||
pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
|
||||
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
|
||||
raid6_gfexp[failb]]];
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0]));
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0]));
|
||||
asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0]));
|
||||
asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16]));
|
||||
#endif
|
||||
|
||||
/* Now do it... */
|
||||
while (bytes) {
|
||||
#ifdef CONFIG_X86_64
|
||||
/* xmm6, xmm14, xmm15 */
|
||||
|
||||
asm volatile("movdqa %0,%%xmm1" : : "m" (q[0]));
|
||||
asm volatile("movdqa %0,%%xmm9" : : "m" (q[16]));
|
||||
asm volatile("movdqa %0,%%xmm0" : : "m" (p[0]));
|
||||
asm volatile("movdqa %0,%%xmm8" : : "m" (p[16]));
|
||||
asm volatile("pxor %0,%%xmm1" : : "m" (dq[0]));
|
||||
asm volatile("pxor %0,%%xmm9" : : "m" (dq[16]));
|
||||
asm volatile("pxor %0,%%xmm0" : : "m" (dp[0]));
|
||||
asm volatile("pxor %0,%%xmm8" : : "m" (dp[16]));
|
||||
|
||||
/* xmm0/8 = px */
|
||||
|
||||
asm volatile("movdqa %xmm6,%xmm4");
|
||||
asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
|
||||
asm volatile("movdqa %xmm6,%xmm12");
|
||||
asm volatile("movdqa %xmm5,%xmm13");
|
||||
asm volatile("movdqa %xmm1,%xmm3");
|
||||
asm volatile("movdqa %xmm9,%xmm11");
|
||||
asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */
|
||||
asm volatile("movdqa %xmm8,%xmm10");
|
||||
asm volatile("psraw $4,%xmm1");
|
||||
asm volatile("psraw $4,%xmm9");
|
||||
asm volatile("pand %xmm7,%xmm3");
|
||||
asm volatile("pand %xmm7,%xmm11");
|
||||
asm volatile("pand %xmm7,%xmm1");
|
||||
asm volatile("pand %xmm7,%xmm9");
|
||||
asm volatile("pshufb %xmm3,%xmm4");
|
||||
asm volatile("pshufb %xmm11,%xmm12");
|
||||
asm volatile("pshufb %xmm1,%xmm5");
|
||||
asm volatile("pshufb %xmm9,%xmm13");
|
||||
asm volatile("pxor %xmm4,%xmm5");
|
||||
asm volatile("pxor %xmm12,%xmm13");
|
||||
|
||||
/* xmm5/13 = qx */
|
||||
|
||||
asm volatile("movdqa %xmm14,%xmm4");
|
||||
asm volatile("movdqa %xmm15,%xmm1");
|
||||
asm volatile("movdqa %xmm14,%xmm12");
|
||||
asm volatile("movdqa %xmm15,%xmm9");
|
||||
asm volatile("movdqa %xmm2,%xmm3");
|
||||
asm volatile("movdqa %xmm10,%xmm11");
|
||||
asm volatile("psraw $4,%xmm2");
|
||||
asm volatile("psraw $4,%xmm10");
|
||||
asm volatile("pand %xmm7,%xmm3");
|
||||
asm volatile("pand %xmm7,%xmm11");
|
||||
asm volatile("pand %xmm7,%xmm2");
|
||||
asm volatile("pand %xmm7,%xmm10");
|
||||
asm volatile("pshufb %xmm3,%xmm4");
|
||||
asm volatile("pshufb %xmm11,%xmm12");
|
||||
asm volatile("pshufb %xmm2,%xmm1");
|
||||
asm volatile("pshufb %xmm10,%xmm9");
|
||||
asm volatile("pxor %xmm4,%xmm1");
|
||||
asm volatile("pxor %xmm12,%xmm9");
|
||||
|
||||
/* xmm1/9 = pbmul[px] */
|
||||
asm volatile("pxor %xmm5,%xmm1");
|
||||
asm volatile("pxor %xmm13,%xmm9");
|
||||
/* xmm1/9 = db = DQ */
|
||||
asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0]));
|
||||
asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16]));
|
||||
|
||||
asm volatile("pxor %xmm1,%xmm0");
|
||||
asm volatile("pxor %xmm9,%xmm8");
|
||||
asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0]));
|
||||
asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16]));
|
||||
|
||||
bytes -= 32;
|
||||
p += 32;
|
||||
q += 32;
|
||||
dp += 32;
|
||||
dq += 32;
|
||||
#else
|
||||
asm volatile("movdqa %0,%%xmm1" : : "m" (*q));
|
||||
asm volatile("movdqa %0,%%xmm0" : : "m" (*p));
|
||||
asm volatile("pxor %0,%%xmm1" : : "m" (*dq));
|
||||
asm volatile("pxor %0,%%xmm0" : : "m" (*dp));
|
||||
|
||||
/* 1 = dq ^ q
|
||||
* 0 = dp ^ p
|
||||
*/
|
||||
asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0]));
|
||||
asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
|
||||
|
||||
asm volatile("movdqa %xmm1,%xmm3");
|
||||
asm volatile("psraw $4,%xmm1");
|
||||
asm volatile("pand %xmm7,%xmm3");
|
||||
asm volatile("pand %xmm7,%xmm1");
|
||||
asm volatile("pshufb %xmm3,%xmm4");
|
||||
asm volatile("pshufb %xmm1,%xmm5");
|
||||
asm volatile("pxor %xmm4,%xmm5");
|
||||
|
||||
asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */
|
||||
|
||||
/* xmm5 = qx */
|
||||
|
||||
asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0]));
|
||||
asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16]));
|
||||
asm volatile("movdqa %xmm2,%xmm3");
|
||||
asm volatile("psraw $4,%xmm2");
|
||||
asm volatile("pand %xmm7,%xmm3");
|
||||
asm volatile("pand %xmm7,%xmm2");
|
||||
asm volatile("pshufb %xmm3,%xmm4");
|
||||
asm volatile("pshufb %xmm2,%xmm1");
|
||||
asm volatile("pxor %xmm4,%xmm1");
|
||||
|
||||
/* xmm1 = pbmul[px] */
|
||||
asm volatile("pxor %xmm5,%xmm1");
|
||||
/* xmm1 = db = DQ */
|
||||
asm volatile("movdqa %%xmm1,%0" : "=m" (*dq));
|
||||
|
||||
asm volatile("pxor %xmm1,%xmm0");
|
||||
asm volatile("movdqa %%xmm0,%0" : "=m" (*dp));
|
||||
|
||||
bytes -= 16;
|
||||
p += 16;
|
||||
q += 16;
|
||||
dp += 16;
|
||||
dq += 16;
|
||||
#endif
|
||||
}
|
||||
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
|
||||
void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, void **ptrs)
|
||||
{
|
||||
u8 *p, *q, *dq;
|
||||
const u8 *qmul; /* Q multiplier table */
|
||||
static const u8 __aligned(16) x0f[16] = {
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
|
||||
|
||||
p = (u8 *)ptrs[disks-2];
|
||||
q = (u8 *)ptrs[disks-1];
|
||||
|
||||
/* Compute syndrome with zero for the missing data page
|
||||
Use the dead data page as temporary storage for delta q */
|
||||
dq = (u8 *)ptrs[faila];
|
||||
ptrs[faila] = (void *)raid6_empty_zero_page;
|
||||
ptrs[disks-1] = dq;
|
||||
|
||||
raid6_call.gen_syndrome(disks, bytes, ptrs);
|
||||
|
||||
/* Restore pointer table */
|
||||
ptrs[faila] = dq;
|
||||
ptrs[disks-1] = q;
|
||||
|
||||
/* Now, pick the proper data tables */
|
||||
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0]));
|
||||
|
||||
while (bytes) {
|
||||
#ifdef CONFIG_X86_64
|
||||
asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
|
||||
asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16]));
|
||||
asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
|
||||
asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
|
||||
|
||||
/* xmm3 = q[0] ^ dq[0] */
|
||||
|
||||
asm volatile("pxor %0, %%xmm4" : : "m" (q[16]));
|
||||
asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
|
||||
|
||||
/* xmm4 = q[16] ^ dq[16] */
|
||||
|
||||
asm volatile("movdqa %xmm3, %xmm6");
|
||||
asm volatile("movdqa %xmm4, %xmm8");
|
||||
|
||||
/* xmm4 = xmm8 = q[16] ^ dq[16] */
|
||||
|
||||
asm volatile("psraw $4, %xmm3");
|
||||
asm volatile("pand %xmm7, %xmm6");
|
||||
asm volatile("pand %xmm7, %xmm3");
|
||||
asm volatile("pshufb %xmm6, %xmm0");
|
||||
asm volatile("pshufb %xmm3, %xmm1");
|
||||
asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0]));
|
||||
asm volatile("pxor %xmm0, %xmm1");
|
||||
asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16]));
|
||||
|
||||
/* xmm1 = qmul[q[0] ^ dq[0]] */
|
||||
|
||||
asm volatile("psraw $4, %xmm4");
|
||||
asm volatile("pand %xmm7, %xmm8");
|
||||
asm volatile("pand %xmm7, %xmm4");
|
||||
asm volatile("pshufb %xmm8, %xmm10");
|
||||
asm volatile("pshufb %xmm4, %xmm11");
|
||||
asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
|
||||
asm volatile("pxor %xmm10, %xmm11");
|
||||
asm volatile("movdqa %0, %%xmm12" : : "m" (p[16]));
|
||||
|
||||
/* xmm11 = qmul[q[16] ^ dq[16]] */
|
||||
|
||||
asm volatile("pxor %xmm1, %xmm2");
|
||||
|
||||
/* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */
|
||||
|
||||
asm volatile("pxor %xmm11, %xmm12");
|
||||
|
||||
/* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */
|
||||
|
||||
asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
|
||||
asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16]));
|
||||
|
||||
asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
|
||||
asm volatile("movdqa %%xmm12, %0" : "=m" (p[16]));
|
||||
|
||||
bytes -= 32;
|
||||
p += 32;
|
||||
q += 32;
|
||||
dq += 32;
|
||||
|
||||
#else
|
||||
asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
|
||||
asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
|
||||
asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
|
||||
asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
|
||||
|
||||
/* xmm3 = *q ^ *dq */
|
||||
|
||||
asm volatile("movdqa %xmm3, %xmm6");
|
||||
asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
|
||||
asm volatile("psraw $4, %xmm3");
|
||||
asm volatile("pand %xmm7, %xmm6");
|
||||
asm volatile("pand %xmm7, %xmm3");
|
||||
asm volatile("pshufb %xmm6, %xmm0");
|
||||
asm volatile("pshufb %xmm3, %xmm1");
|
||||
asm volatile("pxor %xmm0, %xmm1");
|
||||
|
||||
/* xmm1 = qmul[*q ^ *dq */
|
||||
|
||||
asm volatile("pxor %xmm1, %xmm2");
|
||||
|
||||
/* xmm2 = *p ^ qmul[*q ^ *dq] */
|
||||
|
||||
asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
|
||||
asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
|
||||
|
||||
bytes -= 16;
|
||||
p += 16;
|
||||
q += 16;
|
||||
dq += 16;
|
||||
#endif
|
||||
}
|
||||
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
const struct raid6_recov_calls raid6_recov_ssse3 = {
|
||||
.data2 = raid6_2data_recov_ssse3,
|
||||
.datap = raid6_datap_recov_ssse3,
|
||||
.valid = raid6_has_ssse3,
|
||||
#ifdef CONFIG_X86_64
|
||||
.name = "ssse3x2",
|
||||
#else
|
||||
.name = "ssse3x1",
|
||||
#endif
|
||||
.priority = 1,
|
||||
};
|
||||
|
||||
#endif
|
@ -23,7 +23,7 @@ RANLIB = ranlib
|
||||
all: raid6.a raid6test
|
||||
|
||||
raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
|
||||
altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \
|
||||
altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \
|
||||
tables.o
|
||||
rm -f $@
|
||||
$(AR) cq $@ $^
|
||||
|
@ -90,25 +90,35 @@ static int test_disks(int i, int j)
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
const struct raid6_calls *const *algo;
|
||||
const struct raid6_recov_calls *const *ra;
|
||||
int i, j;
|
||||
int err = 0;
|
||||
|
||||
makedata();
|
||||
|
||||
for (algo = raid6_algos; *algo; algo++) {
|
||||
if (!(*algo)->valid || (*algo)->valid()) {
|
||||
raid6_call = **algo;
|
||||
for (ra = raid6_recov_algos; *ra; ra++) {
|
||||
if ((*ra)->valid && !(*ra)->valid())
|
||||
continue;
|
||||
raid6_2data_recov = (*ra)->data2;
|
||||
raid6_datap_recov = (*ra)->datap;
|
||||
|
||||
/* Nuke syndromes */
|
||||
memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
|
||||
printf("using recovery %s\n", (*ra)->name);
|
||||
|
||||
/* Generate assumed good syndrome */
|
||||
raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
|
||||
(void **)&dataptrs);
|
||||
for (algo = raid6_algos; *algo; algo++) {
|
||||
if (!(*algo)->valid || (*algo)->valid()) {
|
||||
raid6_call = **algo;
|
||||
|
||||
for (i = 0; i < NDISKS-1; i++)
|
||||
for (j = i+1; j < NDISKS; j++)
|
||||
err += test_disks(i, j);
|
||||
/* Nuke syndromes */
|
||||
memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
|
||||
|
||||
/* Generate assumed good syndrome */
|
||||
raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
|
||||
(void **)&dataptrs);
|
||||
|
||||
for (i = 0; i < NDISKS-1; i++)
|
||||
for (j = i+1; j < NDISKS; j++)
|
||||
err += test_disks(i, j);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
@ -35,24 +35,29 @@ static inline void kernel_fpu_end(void)
|
||||
{
|
||||
}
|
||||
|
||||
#define __aligned(x) __attribute__((aligned(x)))
|
||||
|
||||
#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
|
||||
#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions
|
||||
* (fast save and restore) */
|
||||
#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
|
||||
#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
|
||||
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
|
||||
#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
|
||||
#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
|
||||
#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
|
||||
|
||||
/* Should work well enough on modern CPUs for testing */
|
||||
static inline int boot_cpu_has(int flag)
|
||||
{
|
||||
u32 eax = (flag >> 5) ? 0x80000001 : 1;
|
||||
u32 edx;
|
||||
u32 eax = (flag & 0x20) ? 0x80000001 : 1;
|
||||
u32 ecx, edx;
|
||||
|
||||
asm volatile("cpuid"
|
||||
: "+a" (eax), "=d" (edx)
|
||||
: : "ecx", "ebx");
|
||||
: "+a" (eax), "=d" (edx), "=c" (ecx)
|
||||
: : "ebx");
|
||||
|
||||
return (edx >> (flag & 31)) & 1;
|
||||
return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1;
|
||||
}
|
||||
|
||||
#endif /* ndef __KERNEL__ */
|
||||
|
Loading…
Reference in New Issue
Block a user