mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-24 06:44:23 +08:00
c12cd77cb0
Commit3ee48b6af4
("mm, x86: Saving vmcore with non-lazy freeing of vmas") introduced set_iounmap_nonlazy(), which sets vmap_lazy_nr to lazy_max_pages() + 1, ensuring that any future vunmaps() immediately purge the vmap areas instead of doing it lazily. Commit690467c81b
("mm/vmalloc: Move draining areas out of caller context") moved the purging from the vunmap() caller to a worker thread. Unfortunately, set_iounmap_nonlazy() can cause the worker thread to spin (possibly forever). For example, consider the following scenario: 1. Thread reads from /proc/vmcore. This eventually calls __copy_oldmem_page() -> set_iounmap_nonlazy(), which sets vmap_lazy_nr to lazy_max_pages() + 1. 2. Then it calls free_vmap_area_noflush() (via iounmap()), which adds 2 pages (one page plus the guard page) to the purge list and vmap_lazy_nr. vmap_lazy_nr is now lazy_max_pages() + 3, so the drain_vmap_work is scheduled. 3. Thread returns from the kernel and is scheduled out. 4. Worker thread is scheduled in and calls drain_vmap_area_work(). It frees the 2 pages on the purge list. vmap_lazy_nr is now lazy_max_pages() + 1. 5. This is still over the threshold, so it tries to purge areas again, but doesn't find anything. 6. Repeat 5. If the system is running with only one CPU (which is typicial for kdump) and preemption is disabled, then this will never make forward progress: there aren't any more pages to purge, so it hangs. If there is more than one CPU or preemption is enabled, then the worker thread will spin forever in the background. (Note that if there were already pages to be purged at the time that set_iounmap_nonlazy() was called, this bug is avoided.) This can be reproduced with anything that reads from /proc/vmcore multiple times. E.g., vmcore-dmesg /proc/vmcore. It turns out that improvements to vmap() over the years have obsoleted the need for this "optimization". I benchmarked `dd if=/proc/vmcore of=/dev/null` with 4k and 1M read sizes on a system with a 32GB vmcore. The test was run on 5.17, 5.18-rc1 with a fix that avoided the hang, and 5.18-rc1 with set_iounmap_nonlazy() removed entirely: |5.17 |5.18+fix|5.18+removal 4k|40.86s| 40.09s| 26.73s 1M|24.47s| 23.98s| 21.84s The removal was the fastest (by a wide margin with 4k reads). This patch removes set_iounmap_nonlazy(). Link: https://lkml.kernel.org/r/52f819991051f9b865e9ce25605509bfdbacadcd.1649277321.git.osandov@fb.com Fixes:690467c81b
("mm/vmalloc: Move draining areas out of caller context") Signed-off-by: Omar Sandoval <osandov@fb.com> Acked-by: Chris Down <chris@chrisdown.name> Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Acked-by: Baoquan He <bhe@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
79 lines
2.2 KiB
C
79 lines
2.2 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Memory preserving reboot related code.
|
|
*
|
|
* Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
|
|
* Copyright (C) IBM Corporation, 2004. All rights reserved
|
|
*/
|
|
|
|
#include <linux/errno.h>
|
|
#include <linux/crash_dump.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/io.h>
|
|
#include <linux/cc_platform.h>
|
|
|
|
static ssize_t __copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
|
|
unsigned long offset, int userbuf,
|
|
bool encrypted)
|
|
{
|
|
void *vaddr;
|
|
|
|
if (!csize)
|
|
return 0;
|
|
|
|
if (encrypted)
|
|
vaddr = (__force void *)ioremap_encrypted(pfn << PAGE_SHIFT, PAGE_SIZE);
|
|
else
|
|
vaddr = (__force void *)ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
|
|
|
|
if (!vaddr)
|
|
return -ENOMEM;
|
|
|
|
if (userbuf) {
|
|
if (copy_to_user((void __user *)buf, vaddr + offset, csize)) {
|
|
iounmap((void __iomem *)vaddr);
|
|
return -EFAULT;
|
|
}
|
|
} else
|
|
memcpy(buf, vaddr + offset, csize);
|
|
|
|
iounmap((void __iomem *)vaddr);
|
|
return csize;
|
|
}
|
|
|
|
/**
|
|
* copy_oldmem_page - copy one page of memory
|
|
* @pfn: page frame number to be copied
|
|
* @buf: target memory address for the copy; this can be in kernel address
|
|
* space or user address space (see @userbuf)
|
|
* @csize: number of bytes to copy
|
|
* @offset: offset in bytes into the page (based on pfn) to begin the copy
|
|
* @userbuf: if set, @buf is in user address space, use copy_to_user(),
|
|
* otherwise @buf is in kernel address space, use memcpy().
|
|
*
|
|
* Copy a page from the old kernel's memory. For this page, there is no pte
|
|
* mapped in the current kernel. We stitch up a pte, similar to kmap_atomic.
|
|
*/
|
|
ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
|
|
unsigned long offset, int userbuf)
|
|
{
|
|
return __copy_oldmem_page(pfn, buf, csize, offset, userbuf, false);
|
|
}
|
|
|
|
/**
|
|
* copy_oldmem_page_encrypted - same as copy_oldmem_page() above but ioremap the
|
|
* memory with the encryption mask set to accommodate kdump on SME-enabled
|
|
* machines.
|
|
*/
|
|
ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize,
|
|
unsigned long offset, int userbuf)
|
|
{
|
|
return __copy_oldmem_page(pfn, buf, csize, offset, userbuf, true);
|
|
}
|
|
|
|
ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
|
|
{
|
|
return read_from_oldmem(buf, count, ppos, 0,
|
|
cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT));
|
|
}
|