mm: introduce mf_dax_kill_procs() for fsdax case

This new function is a variant of mf_generic_kill_procs that accepts a
file, offset pair instead of a struct to support multiple files sharing a
DAX mapping.  It is intended to be called by the file systems as part of
the memory_failure handler after the file system performed a reverse
mapping from the storage address to the file and file offset.

Link: https://lkml.kernel.org/r/20220603053738.1218681-6-ruansy.fnst@fujitsu.com
Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Williams <dan.j.wiliams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Goldwyn Rodrigues <rgoldwyn@suse.com>
Cc: Goldwyn Rodrigues <rgoldwyn@suse.de>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Ritesh Harjani <riteshh@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Shiyang Ruan 2022-06-03 13:37:29 +08:00 committed by akpm
parent 2f437effc6
commit c36e202495
2 changed files with 88 additions and 10 deletions

View File

@ -3178,6 +3178,8 @@ enum mf_flags {
MF_UNPOISON = 1 << 4,
MF_SW_SIMULATED = 1 << 5,
};
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
unsigned long count, int mf_flags);
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);

View File

@ -297,10 +297,9 @@ void shake_page(struct page *p)
}
EXPORT_SYMBOL_GPL(shake_page);
static unsigned long dev_pagemap_mapping_shift(struct page *page,
struct vm_area_struct *vma)
static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
unsigned long address)
{
unsigned long address = vma_address(page, vma);
unsigned long ret = 0;
pgd_t *pgd;
p4d_t *p4d;
@ -340,10 +339,14 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page,
/*
* Schedule a process for later kill.
* Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
*
* Notice: @fsdax_pgoff is used only when @p is a fsdax page.
* In other cases, such as anonymous and file-backend page, the address to be
* killed can be caculated by @p itself.
*/
static void add_to_kill(struct task_struct *tsk, struct page *p,
struct vm_area_struct *vma,
struct list_head *to_kill)
pgoff_t fsdax_pgoff, struct vm_area_struct *vma,
struct list_head *to_kill)
{
struct to_kill *tk;
@ -354,9 +357,15 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
}
tk->addr = page_address_in_vma(p, vma);
if (is_zone_device_page(p))
tk->size_shift = dev_pagemap_mapping_shift(p, vma);
else
if (is_zone_device_page(p)) {
/*
* Since page->mapping is not used for fsdax, we need
* calculate the address based on the vma.
*/
if (p->pgmap->type == MEMORY_DEVICE_FS_DAX)
tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
} else
tk->size_shift = page_shift(compound_head(p));
/*
@ -505,7 +514,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
if (!page_mapped_in_vma(page, vma))
continue;
if (vma->vm_mm == t->mm)
add_to_kill(t, page, vma, to_kill);
add_to_kill(t, page, 0, vma, to_kill);
}
}
read_unlock(&tasklist_lock);
@ -541,13 +550,41 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
* to be informed of all such data corruptions.
*/
if (vma->vm_mm == t->mm)
add_to_kill(t, page, vma, to_kill);
add_to_kill(t, page, 0, vma, to_kill);
}
}
read_unlock(&tasklist_lock);
i_mmap_unlock_read(mapping);
}
#ifdef CONFIG_FS_DAX
/*
* Collect processes when the error hit a fsdax page.
*/
static void collect_procs_fsdax(struct page *page,
struct address_space *mapping, pgoff_t pgoff,
struct list_head *to_kill)
{
struct vm_area_struct *vma;
struct task_struct *tsk;
i_mmap_lock_read(mapping);
read_lock(&tasklist_lock);
for_each_process(tsk) {
struct task_struct *t = task_early_kill(tsk, true);
if (!t)
continue;
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
if (vma->vm_mm == t->mm)
add_to_kill(t, page, pgoff, vma, to_kill);
}
}
read_unlock(&tasklist_lock);
i_mmap_unlock_read(mapping);
}
#endif /* CONFIG_FS_DAX */
/*
* Collect the processes who have the corrupted page mapped to kill.
*/
@ -1588,6 +1625,45 @@ unlock:
return rc;
}
#ifdef CONFIG_FS_DAX
/**
* mf_dax_kill_procs - Collect and kill processes who are using this file range
* @mapping: address_space of the file in use
* @index: start pgoff of the range within the file
* @count: length of the range, in unit of PAGE_SIZE
* @mf_flags: memory failure flags
*/
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
unsigned long count, int mf_flags)
{
LIST_HEAD(to_kill);
dax_entry_t cookie;
struct page *page;
size_t end = index + count;
mf_flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
for (; index < end; index++) {
page = NULL;
cookie = dax_lock_mapping_entry(mapping, index, &page);
if (!cookie)
return -EBUSY;
if (!page)
goto unlock;
SetPageHWPoison(page);
collect_procs_fsdax(page, mapping, index, &to_kill);
unmap_and_kill(&to_kill, page_to_pfn(page), mapping,
index, mf_flags);
unlock:
dax_unlock_mapping_entry(mapping, index, cookie);
}
return 0;
}
EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
#endif /* CONFIG_FS_DAX */
/*
* Called from hugetlb code with hugetlb_lock held.
*