mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
libnvdimm for 5.19
- Add support for clearing memory error via pwrite(2) on DAX - Fix 'security overwrite' support in the presence of media errors - Miscellaneous cleanups and fixes for nfit_test (nvdimm unit tests) -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQSbo+XnGs+rwLz9XGXfioYZHlFsZwUCYpFPcQAKCRDfioYZHlFs Z9A3AQCdfoT5sY3OK+I/3oTvJ//6lw2MtXrnXFM046ICKPi9sgD8CzR9mRAHA+vj kxOtJEU2bA9naninXGORsDUndiNkwQo= =gVIn -----END PGP SIGNATURE----- Merge tag 'libnvdimm-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm Pull libnvdimm and DAX updates from Dan Williams: "New support for clearing memory errors when a file is in DAX mode, alongside with some other fixes and cleanups. Previously it was only possible to clear these errors using a truncate or hole-punch operation to trigger the filesystem to reallocate the block, now, any page aligned write can opportunistically clear errors as well. This change spans x86/mm, nvdimm, and fs/dax, and has received the appropriate sign-offs. Thanks to Jane for her work on this. Summary: - Add support for clearing memory error via pwrite(2) on DAX - Fix 'security overwrite' support in the presence of media errors - Miscellaneous cleanups and fixes for nfit_test (nvdimm unit tests)" * tag 'libnvdimm-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: pmem: implement pmem_recovery_write() pmem: refactor pmem_clear_poison() dax: add .recovery_write dax_operation dax: introduce DAX_RECOVERY_WRITE dax access mode mce: fix set_mce_nospec to always unmap the whole page x86/mce: relocate set{clear}_mce_nospec() functions acpi/nfit: rely on mce->misc to determine poison granularity testing: nvdimm: asm/mce.h is not needed in nfit.c testing: nvdimm: iomap: make __nfit_test_ioremap a macro nvdimm: Allow overwrite in the presence of disabled dimms tools/testing/nvdimm: remove unneeded flush_workqueue
This commit is contained in:
commit
35cdd8656e
@ -86,56 +86,4 @@ bool kernel_page_present(struct page *page);
|
||||
|
||||
extern int kernel_set_to_readonly;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Prevent speculative access to the page by either unmapping
|
||||
* it (if we do not require access to any part of the page) or
|
||||
* marking it uncacheable (if we want to try to retrieve data
|
||||
* from non-poisoned lines in the page).
|
||||
*/
|
||||
static inline int set_mce_nospec(unsigned long pfn, bool unmap)
|
||||
{
|
||||
unsigned long decoy_addr;
|
||||
int rc;
|
||||
|
||||
/* SGX pages are not in the 1:1 map */
|
||||
if (arch_is_platform_page(pfn << PAGE_SHIFT))
|
||||
return 0;
|
||||
/*
|
||||
* We would like to just call:
|
||||
* set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1);
|
||||
* but doing that would radically increase the odds of a
|
||||
* speculative access to the poison page because we'd have
|
||||
* the virtual address of the kernel 1:1 mapping sitting
|
||||
* around in registers.
|
||||
* Instead we get tricky. We create a non-canonical address
|
||||
* that looks just like the one we want, but has bit 63 flipped.
|
||||
* This relies on set_memory_XX() properly sanitizing any __pa()
|
||||
* results with __PHYSICAL_MASK or PTE_PFN_MASK.
|
||||
*/
|
||||
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
|
||||
|
||||
if (unmap)
|
||||
rc = set_memory_np(decoy_addr, 1);
|
||||
else
|
||||
rc = set_memory_uc(decoy_addr, 1);
|
||||
if (rc)
|
||||
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
|
||||
return rc;
|
||||
}
|
||||
#define set_mce_nospec set_mce_nospec
|
||||
|
||||
/* Restore full speculative operation to the pfn. */
|
||||
static inline int clear_mce_nospec(unsigned long pfn)
|
||||
{
|
||||
return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
|
||||
}
|
||||
#define clear_mce_nospec clear_mce_nospec
|
||||
#else
|
||||
/*
|
||||
* Few people would run a 32-bit kernel on a machine that supports
|
||||
* recoverable errors because they have too much memory to boot 32-bit.
|
||||
*/
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_SET_MEMORY_H */
|
||||
|
@ -581,7 +581,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
|
||||
|
||||
pfn = mce->addr >> PAGE_SHIFT;
|
||||
if (!memory_failure(pfn, 0)) {
|
||||
set_mce_nospec(pfn, whole_page(mce));
|
||||
set_mce_nospec(pfn);
|
||||
mce->kflags |= MCE_HANDLED_UC;
|
||||
}
|
||||
|
||||
@ -1318,7 +1318,7 @@ static void kill_me_maybe(struct callback_head *cb)
|
||||
|
||||
ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
|
||||
if (!ret) {
|
||||
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
|
||||
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
|
||||
sync_core();
|
||||
return;
|
||||
}
|
||||
@ -1344,7 +1344,7 @@ static void kill_me_never(struct callback_head *cb)
|
||||
p->mce_count = 0;
|
||||
pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
|
||||
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
|
||||
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
|
||||
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/vmstat.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/cc_platform.h>
|
||||
#include <linux/set_memory.h>
|
||||
|
||||
#include <asm/e820/api.h>
|
||||
#include <asm/processor.h>
|
||||
@ -29,7 +30,6 @@
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/memtype.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/hyperv-tlfs.h>
|
||||
#include <asm/mshyperv.h>
|
||||
|
||||
@ -1805,7 +1805,7 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages,
|
||||
}
|
||||
|
||||
/*
|
||||
* _set_memory_prot is an internal helper for callers that have been passed
|
||||
* __set_memory_prot is an internal helper for callers that have been passed
|
||||
* a pgprot_t value from upper layers and a reservation has already been taken.
|
||||
* If you want to set the pgprot to a specific page protocol, use the
|
||||
* set_memory_xx() functions.
|
||||
@ -1914,6 +1914,51 @@ int set_memory_wb(unsigned long addr, int numpages)
|
||||
}
|
||||
EXPORT_SYMBOL(set_memory_wb);
|
||||
|
||||
/* Prevent speculative access to a page by marking it not-present */
|
||||
#ifdef CONFIG_X86_64
|
||||
int set_mce_nospec(unsigned long pfn)
|
||||
{
|
||||
unsigned long decoy_addr;
|
||||
int rc;
|
||||
|
||||
/* SGX pages are not in the 1:1 map */
|
||||
if (arch_is_platform_page(pfn << PAGE_SHIFT))
|
||||
return 0;
|
||||
/*
|
||||
* We would like to just call:
|
||||
* set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1);
|
||||
* but doing that would radically increase the odds of a
|
||||
* speculative access to the poison page because we'd have
|
||||
* the virtual address of the kernel 1:1 mapping sitting
|
||||
* around in registers.
|
||||
* Instead we get tricky. We create a non-canonical address
|
||||
* that looks just like the one we want, but has bit 63 flipped.
|
||||
* This relies on set_memory_XX() properly sanitizing any __pa()
|
||||
* results with __PHYSICAL_MASK or PTE_PFN_MASK.
|
||||
*/
|
||||
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
|
||||
|
||||
rc = set_memory_np(decoy_addr, 1);
|
||||
if (rc)
|
||||
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int set_memory_present(unsigned long *addr, int numpages)
|
||||
{
|
||||
return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
|
||||
}
|
||||
|
||||
/* Restore full speculative operation to the pfn. */
|
||||
int clear_mce_nospec(unsigned long pfn)
|
||||
{
|
||||
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
|
||||
|
||||
return set_memory_present(&addr, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(clear_mce_nospec);
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
int set_memory_x(unsigned long addr, int numpages)
|
||||
{
|
||||
if (!(__supported_pte_mask & _PAGE_NX))
|
||||
|
@ -32,6 +32,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
|
||||
*/
|
||||
mutex_lock(&acpi_desc_lock);
|
||||
list_for_each_entry(acpi_desc, &acpi_descs, list) {
|
||||
unsigned int align = 1UL << MCI_MISC_ADDR_LSB(mce->misc);
|
||||
struct device *dev = acpi_desc->dev;
|
||||
int found_match = 0;
|
||||
|
||||
@ -63,8 +64,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
|
||||
|
||||
/* If this fails due to an -ENOMEM, there is little we can do */
|
||||
nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
|
||||
ALIGN(mce->addr, L1_CACHE_BYTES),
|
||||
L1_CACHE_BYTES);
|
||||
ALIGN_DOWN(mce->addr, align), align);
|
||||
nvdimm_region_notify(nfit_spa->nd_region,
|
||||
NVDIMM_REVALIDATE_POISON);
|
||||
|
||||
|
@ -117,6 +117,7 @@ enum dax_device_flags {
|
||||
* @dax_dev: a dax_device instance representing the logical memory range
|
||||
* @pgoff: offset in pages from the start of the device to translate
|
||||
* @nr_pages: number of consecutive pages caller can handle relative to @pfn
|
||||
* @mode: indicator on normal access or recovery write
|
||||
* @kaddr: output parameter that returns a virtual address mapping of pfn
|
||||
* @pfn: output parameter that returns an absolute pfn translation of @pgoff
|
||||
*
|
||||
@ -124,7 +125,7 @@ enum dax_device_flags {
|
||||
* pages accessible at the device relative @pgoff.
|
||||
*/
|
||||
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
|
||||
void **kaddr, pfn_t *pfn)
|
||||
enum dax_access_mode mode, void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
long avail;
|
||||
|
||||
@ -138,7 +139,7 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
|
||||
return -EINVAL;
|
||||
|
||||
avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages,
|
||||
kaddr, pfn);
|
||||
mode, kaddr, pfn);
|
||||
if (!avail)
|
||||
return -ERANGE;
|
||||
return min(avail, nr_pages);
|
||||
@ -194,6 +195,15 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_zero_page_range);
|
||||
|
||||
size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *iter)
|
||||
{
|
||||
if (!dax_dev->ops->recovery_write)
|
||||
return 0;
|
||||
return dax_dev->ops->recovery_write(dax_dev, pgoff, addr, bytes, iter);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_recovery_write);
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_PMEM_API
|
||||
void arch_wb_cache_pmem(void *addr, size_t size);
|
||||
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
|
||||
|
@ -165,11 +165,12 @@ static struct dax_device *linear_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
|
||||
}
|
||||
|
||||
static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
long nr_pages, enum dax_access_mode mode, void **kaddr,
|
||||
pfn_t *pfn)
|
||||
{
|
||||
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
|
||||
|
||||
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
|
||||
return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn);
|
||||
}
|
||||
|
||||
static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
||||
@ -180,9 +181,18 @@ static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
||||
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
|
||||
}
|
||||
|
||||
static size_t linear_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
|
||||
|
||||
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
|
||||
}
|
||||
|
||||
#else
|
||||
#define linear_dax_direct_access NULL
|
||||
#define linear_dax_zero_page_range NULL
|
||||
#define linear_dax_recovery_write NULL
|
||||
#endif
|
||||
|
||||
static struct target_type linear_target = {
|
||||
@ -200,6 +210,7 @@ static struct target_type linear_target = {
|
||||
.iterate_devices = linear_iterate_devices,
|
||||
.direct_access = linear_dax_direct_access,
|
||||
.dax_zero_page_range = linear_dax_zero_page_range,
|
||||
.dax_recovery_write = linear_dax_recovery_write,
|
||||
};
|
||||
|
||||
int __init dm_linear_init(void)
|
||||
|
@ -888,11 +888,12 @@ static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
|
||||
}
|
||||
|
||||
static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
long nr_pages, enum dax_access_mode mode, void **kaddr,
|
||||
pfn_t *pfn)
|
||||
{
|
||||
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
|
||||
|
||||
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
|
||||
return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn);
|
||||
}
|
||||
|
||||
static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
||||
@ -903,9 +904,18 @@ static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
||||
return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static size_t log_writes_dax_recovery_write(struct dm_target *ti,
|
||||
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
|
||||
|
||||
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
|
||||
}
|
||||
|
||||
#else
|
||||
#define log_writes_dax_direct_access NULL
|
||||
#define log_writes_dax_zero_page_range NULL
|
||||
#define log_writes_dax_recovery_write NULL
|
||||
#endif
|
||||
|
||||
static struct target_type log_writes_target = {
|
||||
@ -923,6 +933,7 @@ static struct target_type log_writes_target = {
|
||||
.io_hints = log_writes_io_hints,
|
||||
.direct_access = log_writes_dax_direct_access,
|
||||
.dax_zero_page_range = log_writes_dax_zero_page_range,
|
||||
.dax_recovery_write = log_writes_dax_recovery_write,
|
||||
};
|
||||
|
||||
static int __init dm_log_writes_init(void)
|
||||
|
@ -315,11 +315,12 @@ static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
|
||||
}
|
||||
|
||||
static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
long nr_pages, enum dax_access_mode mode, void **kaddr,
|
||||
pfn_t *pfn)
|
||||
{
|
||||
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
|
||||
|
||||
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
|
||||
return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn);
|
||||
}
|
||||
|
||||
static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
||||
@ -330,9 +331,18 @@ static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
||||
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
|
||||
}
|
||||
|
||||
static size_t stripe_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
|
||||
|
||||
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
|
||||
}
|
||||
|
||||
#else
|
||||
#define stripe_dax_direct_access NULL
|
||||
#define stripe_dax_zero_page_range NULL
|
||||
#define stripe_dax_recovery_write NULL
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -469,6 +479,7 @@ static struct target_type stripe_target = {
|
||||
.io_hints = stripe_io_hints,
|
||||
.direct_access = stripe_dax_direct_access,
|
||||
.dax_zero_page_range = stripe_dax_zero_page_range,
|
||||
.dax_recovery_write = stripe_dax_recovery_write,
|
||||
};
|
||||
|
||||
int __init dm_stripe_init(void)
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/dax.h>
|
||||
|
||||
#define DM_MSG_PREFIX "target"
|
||||
|
||||
@ -142,7 +143,8 @@ static void io_err_release_clone_rq(struct request *clone,
|
||||
}
|
||||
|
||||
static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
long nr_pages, enum dax_access_mode mode, void **kaddr,
|
||||
pfn_t *pfn)
|
||||
{
|
||||
return -EIO;
|
||||
}
|
||||
|
@ -286,7 +286,8 @@ static int persistent_memory_claim(struct dm_writecache *wc)
|
||||
|
||||
id = dax_read_lock();
|
||||
|
||||
da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, &wc->memory_map, &pfn);
|
||||
da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, DAX_ACCESS,
|
||||
&wc->memory_map, &pfn);
|
||||
if (da < 0) {
|
||||
wc->memory_map = NULL;
|
||||
r = da;
|
||||
@ -308,8 +309,8 @@ static int persistent_memory_claim(struct dm_writecache *wc)
|
||||
i = 0;
|
||||
do {
|
||||
long daa;
|
||||
daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i, p - i,
|
||||
NULL, &pfn);
|
||||
daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i,
|
||||
p - i, DAX_ACCESS, NULL, &pfn);
|
||||
if (daa <= 0) {
|
||||
r = daa ? daa : -EINVAL;
|
||||
goto err3;
|
||||
|
@ -1143,7 +1143,8 @@ static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
|
||||
}
|
||||
|
||||
static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
long nr_pages, enum dax_access_mode mode, void **kaddr,
|
||||
pfn_t *pfn)
|
||||
{
|
||||
struct mapped_device *md = dax_get_private(dax_dev);
|
||||
sector_t sector = pgoff * PAGE_SECTORS;
|
||||
@ -1161,7 +1162,7 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
if (len < 1)
|
||||
goto out;
|
||||
nr_pages = min(len, nr_pages);
|
||||
ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn);
|
||||
ret = ti->type->direct_access(ti, pgoff, nr_pages, mode, kaddr, pfn);
|
||||
|
||||
out:
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
@ -1196,6 +1197,25 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static size_t dm_dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
struct mapped_device *md = dax_get_private(dax_dev);
|
||||
sector_t sector = pgoff * PAGE_SECTORS;
|
||||
struct dm_target *ti;
|
||||
int srcu_idx;
|
||||
long ret = 0;
|
||||
|
||||
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
|
||||
if (!ti || !ti->type->dax_recovery_write)
|
||||
goto out;
|
||||
|
||||
ret = ti->type->dax_recovery_write(ti, pgoff, addr, bytes, i);
|
||||
out:
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* A target may call dm_accept_partial_bio only from the map routine. It is
|
||||
* allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management
|
||||
@ -3231,6 +3251,7 @@ static const struct block_device_operations dm_rq_blk_dops = {
|
||||
static const struct dax_operations dm_dax_ops = {
|
||||
.direct_access = dm_dax_direct_access,
|
||||
.zero_page_range = dm_dax_zero_page_range,
|
||||
.recovery_write = dm_dax_recovery_write,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -45,9 +45,25 @@ static struct nd_region *to_region(struct pmem_device *pmem)
|
||||
return to_nd_region(to_dev(pmem)->parent);
|
||||
}
|
||||
|
||||
static void hwpoison_clear(struct pmem_device *pmem,
|
||||
phys_addr_t phys, unsigned int len)
|
||||
static phys_addr_t to_phys(struct pmem_device *pmem, phys_addr_t offset)
|
||||
{
|
||||
return pmem->phys_addr + offset;
|
||||
}
|
||||
|
||||
static sector_t to_sect(struct pmem_device *pmem, phys_addr_t offset)
|
||||
{
|
||||
return (offset - pmem->data_offset) >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
static phys_addr_t to_offset(struct pmem_device *pmem, sector_t sector)
|
||||
{
|
||||
return (sector << SECTOR_SHIFT) + pmem->data_offset;
|
||||
}
|
||||
|
||||
static void pmem_mkpage_present(struct pmem_device *pmem, phys_addr_t offset,
|
||||
unsigned int len)
|
||||
{
|
||||
phys_addr_t phys = to_phys(pmem, offset);
|
||||
unsigned long pfn_start, pfn_end, pfn;
|
||||
|
||||
/* only pmem in the linear map supports HWPoison */
|
||||
@ -69,33 +85,40 @@ static void hwpoison_clear(struct pmem_device *pmem,
|
||||
}
|
||||
}
|
||||
|
||||
static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
|
||||
phys_addr_t offset, unsigned int len)
|
||||
static void pmem_clear_bb(struct pmem_device *pmem, sector_t sector, long blks)
|
||||
{
|
||||
struct device *dev = to_dev(pmem);
|
||||
sector_t sector;
|
||||
long cleared;
|
||||
blk_status_t rc = BLK_STS_OK;
|
||||
|
||||
sector = (offset - pmem->data_offset) / 512;
|
||||
|
||||
cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
|
||||
if (cleared < len)
|
||||
rc = BLK_STS_IOERR;
|
||||
if (cleared > 0 && cleared / 512) {
|
||||
hwpoison_clear(pmem, pmem->phys_addr + offset, cleared);
|
||||
cleared /= 512;
|
||||
dev_dbg(dev, "%#llx clear %ld sector%s\n",
|
||||
(unsigned long long) sector, cleared,
|
||||
cleared > 1 ? "s" : "");
|
||||
badblocks_clear(&pmem->bb, sector, cleared);
|
||||
if (blks == 0)
|
||||
return;
|
||||
badblocks_clear(&pmem->bb, sector, blks);
|
||||
if (pmem->bb_state)
|
||||
sysfs_notify_dirent(pmem->bb_state);
|
||||
}
|
||||
|
||||
arch_invalidate_pmem(pmem->virt_addr + offset, len);
|
||||
static long __pmem_clear_poison(struct pmem_device *pmem,
|
||||
phys_addr_t offset, unsigned int len)
|
||||
{
|
||||
phys_addr_t phys = to_phys(pmem, offset);
|
||||
long cleared = nvdimm_clear_poison(to_dev(pmem), phys, len);
|
||||
|
||||
return rc;
|
||||
if (cleared > 0) {
|
||||
pmem_mkpage_present(pmem, offset, cleared);
|
||||
arch_invalidate_pmem(pmem->virt_addr + offset, len);
|
||||
}
|
||||
return cleared;
|
||||
}
|
||||
|
||||
static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
|
||||
phys_addr_t offset, unsigned int len)
|
||||
{
|
||||
long cleared = __pmem_clear_poison(pmem, offset, len);
|
||||
|
||||
if (cleared < 0)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
pmem_clear_bb(pmem, to_sect(pmem, offset), cleared >> SECTOR_SHIFT);
|
||||
if (cleared < len)
|
||||
return BLK_STS_IOERR;
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void write_pmem(void *pmem_addr, struct page *page,
|
||||
@ -143,7 +166,7 @@ static blk_status_t pmem_do_read(struct pmem_device *pmem,
|
||||
sector_t sector, unsigned int len)
|
||||
{
|
||||
blk_status_t rc;
|
||||
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
|
||||
phys_addr_t pmem_off = to_offset(pmem, sector);
|
||||
void *pmem_addr = pmem->virt_addr + pmem_off;
|
||||
|
||||
if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
|
||||
@ -158,36 +181,20 @@ static blk_status_t pmem_do_write(struct pmem_device *pmem,
|
||||
struct page *page, unsigned int page_off,
|
||||
sector_t sector, unsigned int len)
|
||||
{
|
||||
blk_status_t rc = BLK_STS_OK;
|
||||
bool bad_pmem = false;
|
||||
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
|
||||
phys_addr_t pmem_off = to_offset(pmem, sector);
|
||||
void *pmem_addr = pmem->virt_addr + pmem_off;
|
||||
|
||||
if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
|
||||
bad_pmem = true;
|
||||
if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) {
|
||||
blk_status_t rc = pmem_clear_poison(pmem, pmem_off, len);
|
||||
|
||||
/*
|
||||
* Note that we write the data both before and after
|
||||
* clearing poison. The write before clear poison
|
||||
* handles situations where the latest written data is
|
||||
* preserved and the clear poison operation simply marks
|
||||
* the address range as valid without changing the data.
|
||||
* In this case application software can assume that an
|
||||
* interrupted write will either return the new good
|
||||
* data or an error.
|
||||
*
|
||||
* However, if pmem_clear_poison() leaves the data in an
|
||||
* indeterminate state we need to perform the write
|
||||
* after clear poison.
|
||||
*/
|
||||
flush_dcache_page(page);
|
||||
write_pmem(pmem_addr, page, page_off, len);
|
||||
if (unlikely(bad_pmem)) {
|
||||
rc = pmem_clear_poison(pmem, pmem_off, len);
|
||||
write_pmem(pmem_addr, page, page_off, len);
|
||||
if (rc != BLK_STS_OK)
|
||||
return rc;
|
||||
}
|
||||
|
||||
return rc;
|
||||
flush_dcache_page(page);
|
||||
write_pmem(pmem_addr, page, page_off, len);
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void pmem_submit_bio(struct bio *bio)
|
||||
@ -255,24 +262,47 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
|
||||
|
||||
/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
|
||||
__weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
long nr_pages, enum dax_access_mode mode, void **kaddr,
|
||||
pfn_t *pfn)
|
||||
{
|
||||
resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
|
||||
|
||||
if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
|
||||
PFN_PHYS(nr_pages))))
|
||||
return -EIO;
|
||||
sector_t sector = PFN_PHYS(pgoff) >> SECTOR_SHIFT;
|
||||
unsigned int num = PFN_PHYS(nr_pages) >> SECTOR_SHIFT;
|
||||
struct badblocks *bb = &pmem->bb;
|
||||
sector_t first_bad;
|
||||
int num_bad;
|
||||
|
||||
if (kaddr)
|
||||
*kaddr = pmem->virt_addr + offset;
|
||||
if (pfn)
|
||||
*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
|
||||
|
||||
if (bb->count &&
|
||||
badblocks_check(bb, sector, num, &first_bad, &num_bad)) {
|
||||
long actual_nr;
|
||||
|
||||
if (mode != DAX_RECOVERY_WRITE)
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* If badblocks are present, limit known good range to the
|
||||
* requested range.
|
||||
* Set the recovery stride is set to kernel page size because
|
||||
* the underlying driver and firmware clear poison functions
|
||||
* don't appear to handle large chunk(such as 2MiB) reliably.
|
||||
*/
|
||||
if (unlikely(pmem->bb.count))
|
||||
actual_nr = PHYS_PFN(
|
||||
PAGE_ALIGN((first_bad - sector) << SECTOR_SHIFT));
|
||||
dev_dbg(pmem->bb.dev, "start sector(%llu), nr_pages(%ld), first_bad(%llu), actual_nr(%ld)\n",
|
||||
sector, nr_pages, first_bad, actual_nr);
|
||||
if (actual_nr)
|
||||
return actual_nr;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If badblocks are present but not in the range, limit known good range
|
||||
* to the requested range.
|
||||
*/
|
||||
if (bb->count)
|
||||
return nr_pages;
|
||||
return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
|
||||
}
|
||||
@ -294,16 +324,73 @@ static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
}
|
||||
|
||||
static long pmem_dax_direct_access(struct dax_device *dax_dev,
|
||||
pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
pgoff_t pgoff, long nr_pages, enum dax_access_mode mode,
|
||||
void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
struct pmem_device *pmem = dax_get_private(dax_dev);
|
||||
|
||||
return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
|
||||
return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn);
|
||||
}
|
||||
|
||||
/*
|
||||
* The recovery write thread started out as a normal pwrite thread and
|
||||
* when the filesystem was told about potential media error in the
|
||||
* range, filesystem turns the normal pwrite to a dax_recovery_write.
|
||||
*
|
||||
* The recovery write consists of clearing media poison, clearing page
|
||||
* HWPoison bit, reenable page-wide read-write permission, flush the
|
||||
* caches and finally write. A competing pread thread will be held
|
||||
* off during the recovery process since data read back might not be
|
||||
* valid, and this is achieved by clearing the badblock records after
|
||||
* the recovery write is complete. Competing recovery write threads
|
||||
* are already serialized by writer lock held by dax_iomap_rw().
|
||||
*/
|
||||
static size_t pmem_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
struct pmem_device *pmem = dax_get_private(dax_dev);
|
||||
size_t olen, len, off;
|
||||
phys_addr_t pmem_off;
|
||||
struct device *dev = pmem->bb.dev;
|
||||
long cleared;
|
||||
|
||||
off = offset_in_page(addr);
|
||||
len = PFN_PHYS(PFN_UP(off + bytes));
|
||||
if (!is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) >> SECTOR_SHIFT, len))
|
||||
return _copy_from_iter_flushcache(addr, bytes, i);
|
||||
|
||||
/*
|
||||
* Not page-aligned range cannot be recovered. This should not
|
||||
* happen unless something else went wrong.
|
||||
*/
|
||||
if (off || !PAGE_ALIGNED(bytes)) {
|
||||
dev_dbg(dev, "Found poison, but addr(%p) or bytes(%#zx) not page aligned\n",
|
||||
addr, bytes);
|
||||
return 0;
|
||||
}
|
||||
|
||||
pmem_off = PFN_PHYS(pgoff) + pmem->data_offset;
|
||||
cleared = __pmem_clear_poison(pmem, pmem_off, len);
|
||||
if (cleared > 0 && cleared < len) {
|
||||
dev_dbg(dev, "poison cleared only %ld out of %zu bytes\n",
|
||||
cleared, len);
|
||||
return 0;
|
||||
}
|
||||
if (cleared < 0) {
|
||||
dev_dbg(dev, "poison clear failed: %ld\n", cleared);
|
||||
return 0;
|
||||
}
|
||||
|
||||
olen = _copy_from_iter_flushcache(addr, bytes, i);
|
||||
pmem_clear_bb(pmem, to_sect(pmem, pmem_off), cleared >> SECTOR_SHIFT);
|
||||
|
||||
return olen;
|
||||
}
|
||||
|
||||
static const struct dax_operations pmem_dax_ops = {
|
||||
.direct_access = pmem_dax_direct_access,
|
||||
.zero_page_range = pmem_dax_zero_page_range,
|
||||
.recovery_write = pmem_recovery_write,
|
||||
};
|
||||
|
||||
static ssize_t write_cache_show(struct device *dev,
|
||||
|
@ -8,6 +8,8 @@
|
||||
#include <linux/pfn_t.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
enum dax_access_mode;
|
||||
|
||||
/* this definition is in it's own header for tools/testing/nvdimm to consume */
|
||||
struct pmem_device {
|
||||
/* One contiguous memory region per device */
|
||||
@ -28,7 +30,8 @@ struct pmem_device {
|
||||
};
|
||||
|
||||
long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn);
|
||||
long nr_pages, enum dax_access_mode mode, void **kaddr,
|
||||
pfn_t *pfn);
|
||||
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
static inline bool test_and_clear_pmem_poison(struct page *page)
|
||||
|
@ -379,11 +379,6 @@ static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
|
||||
|| !nvdimm->sec.flags)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (dev->driver == NULL) {
|
||||
dev_dbg(dev, "Unable to overwrite while DIMM active.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rc = check_security_state(nvdimm);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
@ -32,7 +32,8 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode);
|
||||
static void dcssblk_release(struct gendisk *disk, fmode_t mode);
|
||||
static void dcssblk_submit_bio(struct bio *bio);
|
||||
static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn);
|
||||
long nr_pages, enum dax_access_mode mode, void **kaddr,
|
||||
pfn_t *pfn);
|
||||
|
||||
static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
|
||||
|
||||
@ -50,7 +51,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
|
||||
long rc;
|
||||
void *kaddr;
|
||||
|
||||
rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL);
|
||||
rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS,
|
||||
&kaddr, NULL);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
memset(kaddr, 0, nr_pages << PAGE_SHIFT);
|
||||
@ -927,7 +929,8 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff,
|
||||
|
||||
static long
|
||||
dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
long nr_pages, enum dax_access_mode mode, void **kaddr,
|
||||
pfn_t *pfn)
|
||||
{
|
||||
struct dcssblk_dev_info *dev_info = dax_get_private(dax_dev);
|
||||
|
||||
|
20
fs/dax.c
20
fs/dax.c
@ -722,7 +722,8 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter
|
||||
int id;
|
||||
|
||||
id = dax_read_lock();
|
||||
rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, &kaddr, NULL);
|
||||
rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, DAX_ACCESS,
|
||||
&kaddr, NULL);
|
||||
if (rc < 0) {
|
||||
dax_read_unlock(id);
|
||||
return rc;
|
||||
@ -939,7 +940,7 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
|
||||
|
||||
id = dax_read_lock();
|
||||
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
|
||||
NULL, pfnp);
|
||||
DAX_ACCESS, NULL, pfnp);
|
||||
if (length < 0) {
|
||||
rc = length;
|
||||
goto out;
|
||||
@ -1048,7 +1049,7 @@ static int dax_memzero(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
void *kaddr;
|
||||
long ret;
|
||||
|
||||
ret = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
|
||||
ret = dax_direct_access(dax_dev, pgoff, 1, DAX_ACCESS, &kaddr, NULL);
|
||||
if (ret > 0) {
|
||||
memset(kaddr + offset, 0, size);
|
||||
dax_flush(dax_dev, kaddr + offset, size);
|
||||
@ -1165,6 +1166,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
||||
const size_t size = ALIGN(length + offset, PAGE_SIZE);
|
||||
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
|
||||
ssize_t map_len;
|
||||
bool recovery = false;
|
||||
void *kaddr;
|
||||
|
||||
if (fatal_signal_pending(current)) {
|
||||
@ -1173,7 +1175,14 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
||||
}
|
||||
|
||||
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
|
||||
DAX_ACCESS, &kaddr, NULL);
|
||||
if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
|
||||
map_len = dax_direct_access(dax_dev, pgoff,
|
||||
PHYS_PFN(size), DAX_RECOVERY_WRITE,
|
||||
&kaddr, NULL);
|
||||
if (map_len > 0)
|
||||
recovery = true;
|
||||
}
|
||||
if (map_len < 0) {
|
||||
ret = map_len;
|
||||
break;
|
||||
@ -1185,7 +1194,10 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
||||
if (map_len > end - pos)
|
||||
map_len = end - pos;
|
||||
|
||||
if (iov_iter_rw(iter) == WRITE)
|
||||
if (recovery)
|
||||
xfer = dax_recovery_write(dax_dev, pgoff, kaddr,
|
||||
map_len, iter);
|
||||
else if (iov_iter_rw(iter) == WRITE)
|
||||
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
|
||||
map_len, iter);
|
||||
else
|
||||
|
@ -1241,8 +1241,8 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
|
||||
INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker);
|
||||
|
||||
id = dax_read_lock();
|
||||
nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL,
|
||||
NULL);
|
||||
nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size),
|
||||
DAX_ACCESS, NULL, NULL);
|
||||
dax_read_unlock(id);
|
||||
if (nr_pages < 0) {
|
||||
pr_debug("dax_direct_access() returned %ld\n", nr_pages);
|
||||
|
@ -752,7 +752,8 @@ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
|
||||
* offset.
|
||||
*/
|
||||
static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
long nr_pages, enum dax_access_mode mode,
|
||||
void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
struct virtio_fs *fs = dax_get_private(dax_dev);
|
||||
phys_addr_t offset = PFN_PHYS(pgoff);
|
||||
@ -772,7 +773,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
|
||||
long rc;
|
||||
void *kaddr;
|
||||
|
||||
rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL);
|
||||
rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
|
||||
NULL);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
memset(kaddr, 0, nr_pages << PAGE_SHIFT);
|
||||
|
@ -14,6 +14,11 @@ struct iomap_ops;
|
||||
struct iomap_iter;
|
||||
struct iomap;
|
||||
|
||||
enum dax_access_mode {
|
||||
DAX_ACCESS,
|
||||
DAX_RECOVERY_WRITE,
|
||||
};
|
||||
|
||||
struct dax_operations {
|
||||
/*
|
||||
* direct_access: translate a device-relative
|
||||
@ -21,7 +26,7 @@ struct dax_operations {
|
||||
* number of pages available for DAX at that pfn.
|
||||
*/
|
||||
long (*direct_access)(struct dax_device *, pgoff_t, long,
|
||||
void **, pfn_t *);
|
||||
enum dax_access_mode, void **, pfn_t *);
|
||||
/*
|
||||
* Validate whether this device is usable as an fsdax backing
|
||||
* device.
|
||||
@ -30,6 +35,12 @@ struct dax_operations {
|
||||
sector_t, sector_t);
|
||||
/* zero_page_range: required operation. Zero page range */
|
||||
int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
|
||||
/*
|
||||
* recovery_write: recover a poisoned range by DAX device driver
|
||||
* capable of clearing poison.
|
||||
*/
|
||||
size_t (*recovery_write)(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *iter);
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_DAX)
|
||||
@ -40,6 +51,8 @@ void dax_write_cache(struct dax_device *dax_dev, bool wc);
|
||||
bool dax_write_cache_enabled(struct dax_device *dax_dev);
|
||||
bool dax_synchronous(struct dax_device *dax_dev);
|
||||
void set_dax_synchronous(struct dax_device *dax_dev);
|
||||
size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i);
|
||||
/*
|
||||
* Check if given mapping is supported by the file / underlying device.
|
||||
*/
|
||||
@ -87,6 +100,11 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
|
||||
{
|
||||
return !(vma->vm_flags & VM_SYNC);
|
||||
}
|
||||
static inline size_t dax_recovery_write(struct dax_device *dax_dev,
|
||||
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
void set_dax_nocache(struct dax_device *dax_dev);
|
||||
@ -178,7 +196,7 @@ static inline void dax_read_unlock(int id)
|
||||
bool dax_alive(struct dax_device *dax_dev);
|
||||
void *dax_get_private(struct dax_device *dax_dev);
|
||||
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
|
||||
void **kaddr, pfn_t *pfn);
|
||||
enum dax_access_mode mode, void **kaddr, pfn_t *pfn);
|
||||
size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
|
||||
size_t bytes, struct iov_iter *i);
|
||||
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
|
||||
|
@ -20,6 +20,7 @@ struct dm_table;
|
||||
struct dm_report_zones_args;
|
||||
struct mapped_device;
|
||||
struct bio_vec;
|
||||
enum dax_access_mode;
|
||||
|
||||
/*
|
||||
* Type of table, mapped_device's mempool and request_queue
|
||||
@ -146,10 +147,19 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
|
||||
* >= 0 : the number of bytes accessible at the address
|
||||
*/
|
||||
typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn);
|
||||
long nr_pages, enum dax_access_mode node, void **kaddr,
|
||||
pfn_t *pfn);
|
||||
typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
|
||||
size_t nr_pages);
|
||||
|
||||
/*
|
||||
* Returns:
|
||||
* != 0 : number of bytes transferred
|
||||
* 0 : recovery write failed
|
||||
*/
|
||||
typedef size_t (*dm_dax_recovery_write_fn)(struct dm_target *ti, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i);
|
||||
|
||||
void dm_error(const char *message);
|
||||
|
||||
struct dm_dev {
|
||||
@ -199,6 +209,7 @@ struct target_type {
|
||||
dm_io_hints_fn io_hints;
|
||||
dm_dax_direct_access_fn direct_access;
|
||||
dm_dax_zero_page_range_fn dax_zero_page_range;
|
||||
dm_dax_recovery_write_fn dax_recovery_write;
|
||||
|
||||
/* For internal device-mapper use. */
|
||||
struct list_head list;
|
||||
|
@ -42,14 +42,14 @@ static inline bool can_set_direct_map(void)
|
||||
#endif
|
||||
#endif /* CONFIG_ARCH_HAS_SET_DIRECT_MAP */
|
||||
|
||||
#ifndef set_mce_nospec
|
||||
static inline int set_mce_nospec(unsigned long pfn, bool unmap)
|
||||
#ifdef CONFIG_X86_64
|
||||
int set_mce_nospec(unsigned long pfn);
|
||||
int clear_mce_nospec(unsigned long pfn);
|
||||
#else
|
||||
static inline int set_mce_nospec(unsigned long pfn)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef clear_mce_nospec
|
||||
static inline int clear_mce_nospec(unsigned long pfn)
|
||||
{
|
||||
return 0;
|
||||
|
@ -4,11 +4,13 @@
|
||||
*/
|
||||
#include "test/nfit_test.h"
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/dax.h>
|
||||
#include <pmem.h>
|
||||
#include <nd.h>
|
||||
|
||||
long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
long nr_pages, enum dax_access_mode mode, void **kaddr,
|
||||
pfn_t *pfn)
|
||||
{
|
||||
resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
|
||||
|
||||
|
@ -62,16 +62,14 @@ struct nfit_test_resource *get_nfit_res(resource_size_t resource)
|
||||
}
|
||||
EXPORT_SYMBOL(get_nfit_res);
|
||||
|
||||
static void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
|
||||
void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
|
||||
{
|
||||
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
|
||||
|
||||
if (nfit_res)
|
||||
return (void __iomem *) nfit_res->buf + offset
|
||||
- nfit_res->res.start;
|
||||
return fallback_fn(offset, size);
|
||||
}
|
||||
#define __nfit_test_ioremap(offset, size, fallback_fn) ({ \
|
||||
struct nfit_test_resource *nfit_res = get_nfit_res(offset); \
|
||||
nfit_res ? \
|
||||
(void __iomem *) nfit_res->buf + (offset) \
|
||||
- nfit_res->res.start \
|
||||
: \
|
||||
fallback_fn((offset), (size)) ; \
|
||||
})
|
||||
|
||||
void __iomem *__wrap_devm_ioremap(struct device *dev,
|
||||
resource_size_t offset, unsigned long size)
|
||||
|
@ -23,8 +23,6 @@
|
||||
#include "nfit_test.h"
|
||||
#include "../watermark.h"
|
||||
|
||||
#include <asm/mce.h>
|
||||
|
||||
/*
|
||||
* Generate an NFIT table to describe the following topology:
|
||||
*
|
||||
@ -3375,7 +3373,6 @@ static __exit void nfit_test_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
flush_workqueue(nfit_wq);
|
||||
destroy_workqueue(nfit_wq);
|
||||
for (i = 0; i < NUM_NFITS; i++)
|
||||
platform_device_unregister(&instances[i]->pdev);
|
||||
|
Loading…
Reference in New Issue
Block a user