From 37b23e0525d393d48a7d59f870b3bc061a30ccdb Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 24 May 2011 17:11:30 -0700 Subject: [PATCH] x86,mm: make pagefault killable When an oom killing occurs, almost all processes are getting stuck at the following two points. 1) __alloc_pages_nodemask 2) __lock_page_or_retry 1) is not very problematic because TIF_MEMDIE leads to an allocation failure and getting out from page allocator. 2) is more problematic. In an OOM situation, zones typically don't have page cache at all and memory starvation might lead to greatly reduced IO performance. When a fork bomb occurs, TIF_MEMDIE tasks don't die quickly, meaning that a fork bomb may create new process quickly rather than the oom-killer killing it. Then, the system may become livelocked. This patch makes the pagefault interruptible by SIGKILL. Signed-off-by: KOSAKI Motohiro Reviewed-by: KAMEZAWA Hiroyuki Cc: Minchan Kim Cc: Matthew Wilcox Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 12 +++++++++++- include/linux/mm.h | 1 + mm/filemap.c | 31 ++++++++++++++++++++++++------- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index bcb394dfbb35..f7a2a054a3c0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -965,7 +965,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) struct mm_struct *mm; int fault; int write = error_code & PF_WRITE; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | (write ? FAULT_FLAG_WRITE : 0); tsk = current; @@ -1138,6 +1138,16 @@ good_area: return; } + /* + * Pagefault was interrupted by SIGKILL. We have no reason to + * continue pagefault. + */ + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { + if (!(error_code & PF_USER)) + no_context(regs, error_code, address); + return; + } + /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely diff --git a/include/linux/mm.h b/include/linux/mm.h index 1746f67c33de..57d3d5fade16 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -153,6 +153,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ #define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ +#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is diff --git a/mm/filemap.c b/mm/filemap.c index dea8a38bb2bb..8144f87dcbb4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -654,15 +654,32 @@ EXPORT_SYMBOL_GPL(__lock_page_killable); int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { - if (!(flags & FAULT_FLAG_ALLOW_RETRY)) { - __lock_page(page); - return 1; - } else { - if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) { - up_read(&mm->mmap_sem); + if (flags & FAULT_FLAG_ALLOW_RETRY) { + /* + * CAUTION! In this case, mmap_sem is not released + * even though return 0. + */ + if (flags & FAULT_FLAG_RETRY_NOWAIT) + return 0; + + up_read(&mm->mmap_sem); + if (flags & FAULT_FLAG_KILLABLE) + wait_on_page_locked_killable(page); + else wait_on_page_locked(page); - } return 0; + } else { + if (flags & FAULT_FLAG_KILLABLE) { + int ret; + + ret = __lock_page_killable(page); + if (ret) { + up_read(&mm->mmap_sem); + return 0; + } + } else + __lock_page(page); + return 1; } }