mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-13 14:24:11 +08:00
mm: munlock: batch non-THP page isolation and munlock+putback using pagevec
Currently, munlock_vma_range() calls munlock_vma_page on each page in a loop, which results in repeated taking and releasing of the lru_lock spinlock for isolating pages one by one. This patch batches the munlock operations using an on-stack pagevec, so that isolation is done under single lru_lock. For THP pages, the old behavior is preserved as they might be split while putting them into the pagevec. After this patch, a 9% speedup was measured for munlocking a 56GB large memory area with THP disabled. A new function __munlock_pagevec() is introduced that takes a pagevec and: 1) It clears PageMlocked and isolates all pages under lru_lock. Zone page stats can be also updated using the variant which assumes disabled interrupts. 2) It finishes the munlock and lru putback on all pages under their lock_page. Note that previously, lock_page covered also the PageMlocked clearing and page isolation, but it is not needed for those operations. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Jörn Engel <joern@logfs.org> Acked-by: Mel Gorman <mgorman@suse.de> Cc: Michel Lespinasse <walken@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
586a32ac1d
commit
7225522bb4
196
mm/mlock.c
196
mm/mlock.c
@ -11,6 +11,7 @@
|
|||||||
#include <linux/swap.h>
|
#include <linux/swap.h>
|
||||||
#include <linux/swapops.h>
|
#include <linux/swapops.h>
|
||||||
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
||||||
|
#include <linux/pagevec.h>
|
||||||
#include <linux/mempolicy.h>
|
#include <linux/mempolicy.h>
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
@ -18,6 +19,8 @@
|
|||||||
#include <linux/rmap.h>
|
#include <linux/rmap.h>
|
||||||
#include <linux/mmzone.h>
|
#include <linux/mmzone.h>
|
||||||
#include <linux/hugetlb.h>
|
#include <linux/hugetlb.h>
|
||||||
|
#include <linux/memcontrol.h>
|
||||||
|
#include <linux/mm_inline.h>
|
||||||
|
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
@ -87,6 +90,47 @@ void mlock_vma_page(struct page *page)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finish munlock after successful page isolation
|
||||||
|
*
|
||||||
|
* Page must be locked. This is a wrapper for try_to_munlock()
|
||||||
|
* and putback_lru_page() with munlock accounting.
|
||||||
|
*/
|
||||||
|
static void __munlock_isolated_page(struct page *page)
|
||||||
|
{
|
||||||
|
int ret = SWAP_AGAIN;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Optimization: if the page was mapped just once, that's our mapping
|
||||||
|
* and we don't need to check all the other vmas.
|
||||||
|
*/
|
||||||
|
if (page_mapcount(page) > 1)
|
||||||
|
ret = try_to_munlock(page);
|
||||||
|
|
||||||
|
/* Did try_to_unlock() succeed or punt? */
|
||||||
|
if (ret != SWAP_MLOCK)
|
||||||
|
count_vm_event(UNEVICTABLE_PGMUNLOCKED);
|
||||||
|
|
||||||
|
putback_lru_page(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Accounting for page isolation fail during munlock
|
||||||
|
*
|
||||||
|
* Performs accounting when page isolation fails in munlock. There is nothing
|
||||||
|
* else to do because it means some other task has already removed the page
|
||||||
|
* from the LRU. putback_lru_page() will take care of removing the page from
|
||||||
|
* the unevictable list, if necessary. vmscan [page_referenced()] will move
|
||||||
|
* the page back to the unevictable list if some other vma has it mlocked.
|
||||||
|
*/
|
||||||
|
static void __munlock_isolation_failed(struct page *page)
|
||||||
|
{
|
||||||
|
if (PageUnevictable(page))
|
||||||
|
count_vm_event(UNEVICTABLE_PGSTRANDED);
|
||||||
|
else
|
||||||
|
count_vm_event(UNEVICTABLE_PGMUNLOCKED);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* munlock_vma_page - munlock a vma page
|
* munlock_vma_page - munlock a vma page
|
||||||
* @page - page to be unlocked
|
* @page - page to be unlocked
|
||||||
@ -112,37 +156,10 @@ unsigned int munlock_vma_page(struct page *page)
|
|||||||
unsigned int nr_pages = hpage_nr_pages(page);
|
unsigned int nr_pages = hpage_nr_pages(page);
|
||||||
mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
|
mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
|
||||||
page_mask = nr_pages - 1;
|
page_mask = nr_pages - 1;
|
||||||
if (!isolate_lru_page(page)) {
|
if (!isolate_lru_page(page))
|
||||||
int ret = SWAP_AGAIN;
|
__munlock_isolated_page(page);
|
||||||
|
else
|
||||||
/*
|
__munlock_isolation_failed(page);
|
||||||
* Optimization: if the page was mapped just once,
|
|
||||||
* that's our mapping and we don't need to check all the
|
|
||||||
* other vmas.
|
|
||||||
*/
|
|
||||||
if (page_mapcount(page) > 1)
|
|
||||||
ret = try_to_munlock(page);
|
|
||||||
/*
|
|
||||||
* did try_to_unlock() succeed or punt?
|
|
||||||
*/
|
|
||||||
if (ret != SWAP_MLOCK)
|
|
||||||
count_vm_event(UNEVICTABLE_PGMUNLOCKED);
|
|
||||||
|
|
||||||
putback_lru_page(page);
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* Some other task has removed the page from the LRU.
|
|
||||||
* putback_lru_page() will take care of removing the
|
|
||||||
* page from the unevictable list, if necessary.
|
|
||||||
* vmscan [page_referenced()] will move the page back
|
|
||||||
* to the unevictable list if some other vma has it
|
|
||||||
* mlocked.
|
|
||||||
*/
|
|
||||||
if (PageUnevictable(page))
|
|
||||||
count_vm_event(UNEVICTABLE_PGSTRANDED);
|
|
||||||
else
|
|
||||||
count_vm_event(UNEVICTABLE_PGMUNLOCKED);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return page_mask;
|
return page_mask;
|
||||||
@ -209,6 +226,73 @@ static int __mlock_posix_error_return(long retval)
|
|||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Munlock a batch of pages from the same zone
|
||||||
|
*
|
||||||
|
* The work is split to two main phases. First phase clears the Mlocked flag
|
||||||
|
* and attempts to isolate the pages, all under a single zone lru lock.
|
||||||
|
* The second phase finishes the munlock only for pages where isolation
|
||||||
|
* succeeded.
|
||||||
|
*
|
||||||
|
* Note that pvec is modified during the process. Before returning
|
||||||
|
* pagevec_reinit() is called on it.
|
||||||
|
*/
|
||||||
|
static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int nr = pagevec_count(pvec);
|
||||||
|
|
||||||
|
/* Phase 1: page isolation */
|
||||||
|
spin_lock_irq(&zone->lru_lock);
|
||||||
|
for (i = 0; i < nr; i++) {
|
||||||
|
struct page *page = pvec->pages[i];
|
||||||
|
|
||||||
|
if (TestClearPageMlocked(page)) {
|
||||||
|
struct lruvec *lruvec;
|
||||||
|
int lru;
|
||||||
|
|
||||||
|
/* we have disabled interrupts */
|
||||||
|
__mod_zone_page_state(zone, NR_MLOCK, -1);
|
||||||
|
|
||||||
|
if (PageLRU(page)) {
|
||||||
|
lruvec = mem_cgroup_page_lruvec(page, zone);
|
||||||
|
lru = page_lru(page);
|
||||||
|
|
||||||
|
get_page(page);
|
||||||
|
ClearPageLRU(page);
|
||||||
|
del_page_from_lru_list(page, lruvec, lru);
|
||||||
|
} else {
|
||||||
|
__munlock_isolation_failed(page);
|
||||||
|
goto skip_munlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
skip_munlock:
|
||||||
|
/*
|
||||||
|
* We won't be munlocking this page in the next phase
|
||||||
|
* but we still need to release the follow_page_mask()
|
||||||
|
* pin.
|
||||||
|
*/
|
||||||
|
pvec->pages[i] = NULL;
|
||||||
|
put_page(page);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock_irq(&zone->lru_lock);
|
||||||
|
|
||||||
|
/* Phase 2: page munlock and putback */
|
||||||
|
for (i = 0; i < nr; i++) {
|
||||||
|
struct page *page = pvec->pages[i];
|
||||||
|
|
||||||
|
if (page) {
|
||||||
|
lock_page(page);
|
||||||
|
__munlock_isolated_page(page);
|
||||||
|
unlock_page(page);
|
||||||
|
put_page(page); /* pin from follow_page_mask() */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pagevec_reinit(pvec);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* munlock_vma_pages_range() - munlock all pages in the vma range.'
|
* munlock_vma_pages_range() - munlock all pages in the vma range.'
|
||||||
* @vma - vma containing range to be munlock()ed.
|
* @vma - vma containing range to be munlock()ed.
|
||||||
@ -230,11 +314,16 @@ static int __mlock_posix_error_return(long retval)
|
|||||||
void munlock_vma_pages_range(struct vm_area_struct *vma,
|
void munlock_vma_pages_range(struct vm_area_struct *vma,
|
||||||
unsigned long start, unsigned long end)
|
unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
|
struct pagevec pvec;
|
||||||
|
struct zone *zone = NULL;
|
||||||
|
|
||||||
|
pagevec_init(&pvec, 0);
|
||||||
vma->vm_flags &= ~VM_LOCKED;
|
vma->vm_flags &= ~VM_LOCKED;
|
||||||
|
|
||||||
while (start < end) {
|
while (start < end) {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned int page_mask, page_increm;
|
unsigned int page_mask, page_increm;
|
||||||
|
struct zone *pagezone;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Although FOLL_DUMP is intended for get_dump_page(),
|
* Although FOLL_DUMP is intended for get_dump_page(),
|
||||||
@ -246,20 +335,47 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
|
|||||||
page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
|
page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
|
||||||
&page_mask);
|
&page_mask);
|
||||||
if (page && !IS_ERR(page)) {
|
if (page && !IS_ERR(page)) {
|
||||||
lock_page(page);
|
pagezone = page_zone(page);
|
||||||
/*
|
/* The whole pagevec must be in the same zone */
|
||||||
* Any THP page found by follow_page_mask() may have
|
if (pagezone != zone) {
|
||||||
* gotten split before reaching munlock_vma_page(),
|
if (pagevec_count(&pvec))
|
||||||
* so we need to recompute the page_mask here.
|
__munlock_pagevec(&pvec, zone);
|
||||||
*/
|
zone = pagezone;
|
||||||
page_mask = munlock_vma_page(page);
|
}
|
||||||
unlock_page(page);
|
if (PageTransHuge(page)) {
|
||||||
put_page(page);
|
/*
|
||||||
|
* THP pages are not handled by pagevec due
|
||||||
|
* to their possible split (see below).
|
||||||
|
*/
|
||||||
|
if (pagevec_count(&pvec))
|
||||||
|
__munlock_pagevec(&pvec, zone);
|
||||||
|
lock_page(page);
|
||||||
|
/*
|
||||||
|
* Any THP page found by follow_page_mask() may
|
||||||
|
* have gotten split before reaching
|
||||||
|
* munlock_vma_page(), so we need to recompute
|
||||||
|
* the page_mask here.
|
||||||
|
*/
|
||||||
|
page_mask = munlock_vma_page(page);
|
||||||
|
unlock_page(page);
|
||||||
|
put_page(page); /* follow_page_mask() */
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Non-huge pages are handled in batches
|
||||||
|
* via pagevec. The pin from
|
||||||
|
* follow_page_mask() prevents them from
|
||||||
|
* collapsing by THP.
|
||||||
|
*/
|
||||||
|
if (pagevec_add(&pvec, page) == 0)
|
||||||
|
__munlock_pagevec(&pvec, zone);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
|
page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
|
||||||
start += page_increm * PAGE_SIZE;
|
start += page_increm * PAGE_SIZE;
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
if (pagevec_count(&pvec))
|
||||||
|
__munlock_pagevec(&pvec, zone);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user