diff --git a/fs/exec.c b/fs/exec.c index e6e94c626c2c..9badbc0bfb1d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -242,9 +242,10 @@ static int __bprm_mm_init(struct linux_binprm *bprm) * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ + BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; - vma->vm_flags = VM_STACK_FLAGS; + vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); INIT_LIST_HEAD(&vma->anon_vma_chain); err = insert_vm_struct(mm, vma); @@ -616,6 +617,7 @@ int setup_arg_pages(struct linux_binprm *bprm, else if (executable_stack == EXSTACK_DISABLE_X) vm_flags &= ~VM_EXEC; vm_flags |= mm->def_flags; + vm_flags |= VM_STACK_INCOMPLETE_SETUP; ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end, vm_flags); @@ -630,6 +632,9 @@ int setup_arg_pages(struct linux_binprm *bprm, goto out_unlock; } + /* mprotect_fixup is overkill to remove the temporary stack flags */ + vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP; + stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */ stack_size = vma->vm_end - vma->vm_start; /* diff --git a/include/linux/mm.h b/include/linux/mm.h index fb19bb92b809..98ea5bab963e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -106,6 +106,9 @@ extern unsigned int kobjsize(const void *objp); #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ +/* Bits set in the VMA until the stack is in its final location */ +#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif diff --git a/mm/rmap.c b/mm/rmap.c index b5c320f7d0a5..38a336e2eea1 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1131,6 +1131,20 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, return ret; } +static bool is_vma_temporary_stack(struct vm_area_struct *vma) +{ + int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); + + if (!maybe_stack) + return false; + + if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) == + VM_STACK_INCOMPLETE_SETUP) + return true; + + return false; +} + /** * try_to_unmap_anon - unmap or unlock anonymous page using the object-based * rmap method @@ -1159,7 +1173,21 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { struct vm_area_struct *vma = avc->vma; - unsigned long address = vma_address(page, vma); + unsigned long address; + + /* + * During exec, a temporary VMA is setup and later moved. + * The VMA is moved under the anon_vma lock but not the + * page tables leading to a race where migration cannot + * find the migration ptes. Rather than increasing the + * locking requirements of exec(), migration skips + * temporary VMAs until after exec() completes. + */ + if (PAGE_MIGRATION && (flags & TTU_MIGRATION) && + is_vma_temporary_stack(vma)) + continue; + + address = vma_address(page, vma); if (address == -EFAULT) continue; ret = try_to_unmap_one(page, vma, address, flags);