mirror of
https://github.com/edk2-porting/linux-next.git
synced 2025-01-04 19:54:03 +08:00
ksm: make !merge_across_nodes migration safe
The new KSM NUMA merge_across_nodes knob introduces a problem, when it's set to non-default 0: if a KSM page is migrated to a different NUMA node, how do we migrate its stable node to the right tree? And what if that collides with an existing stable node? ksm_migrate_page() can do no more than it's already doing, updating stable_node->kpfn: the stable tree itself cannot be manipulated without holding ksm_thread_mutex. So accept that a stable tree may temporarily indicate a page belonging to the wrong NUMA node, leave updating until the next pass of ksmd, just be careful not to merge other pages on to a misplaced page. Note nid of holding tree in stable_node, and recognize that it will not always match nid of kpfn. A misplaced KSM page is discovered, either when ksm_do_scan() next comes around to one of its rmap_items (we now have to go to cmp_and_merge_page even on pages in a stable tree), or when stable_tree_search() arrives at a matching node for another page, and this node page is found misplaced. In each case, move the misplaced stable_node to a list of migrate_nodes (and use the address of migrate_nodes as magic by which to identify them): we don't need them in a tree. If stable_tree_search() finds no match for a page, but it's currently exiled to this list, then slot its stable_node right there into the tree, bringing all of its mappings with it; otherwise they get migrated one by one to the original page of the colliding node. stable_tree_search() is now modelled more like stable_tree_insert(), in order to handle these insertions of migrated nodes. remove_node_from_stable_tree(), remove_all_stable_nodes() and ksm_check_stable_tree() have to handle the migrate_nodes list as well as the stable tree itself. Less obviously, we do need to prune the list of stale entries from time to time (scan_get_next_rmap_item() does it once each full scan): whereas stale nodes in the stable tree get naturally pruned as searches try to brush past them, these migrate_nodes may get forgotten and accumulate. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Petr Holasek <pholasek@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Izik Eidus <izik.eidus@ravellosystems.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
c8d6553b95
commit
4146d2d673
164
mm/ksm.c
164
mm/ksm.c
@ -122,13 +122,25 @@ struct ksm_scan {
|
|||||||
/**
|
/**
|
||||||
* struct stable_node - node of the stable rbtree
|
* struct stable_node - node of the stable rbtree
|
||||||
* @node: rb node of this ksm page in the stable tree
|
* @node: rb node of this ksm page in the stable tree
|
||||||
|
* @head: (overlaying parent) &migrate_nodes indicates temporarily on that list
|
||||||
|
* @list: linked into migrate_nodes, pending placement in the proper node tree
|
||||||
* @hlist: hlist head of rmap_items using this ksm page
|
* @hlist: hlist head of rmap_items using this ksm page
|
||||||
* @kpfn: page frame number of this ksm page
|
* @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid)
|
||||||
|
* @nid: NUMA node id of stable tree in which linked (may not match kpfn)
|
||||||
*/
|
*/
|
||||||
struct stable_node {
|
struct stable_node {
|
||||||
struct rb_node node;
|
union {
|
||||||
|
struct rb_node node; /* when node of stable tree */
|
||||||
|
struct { /* when listed for migration */
|
||||||
|
struct list_head *head;
|
||||||
|
struct list_head list;
|
||||||
|
};
|
||||||
|
};
|
||||||
struct hlist_head hlist;
|
struct hlist_head hlist;
|
||||||
unsigned long kpfn;
|
unsigned long kpfn;
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
int nid;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -169,6 +181,9 @@ struct rmap_item {
|
|||||||
static struct rb_root root_unstable_tree[MAX_NUMNODES];
|
static struct rb_root root_unstable_tree[MAX_NUMNODES];
|
||||||
static struct rb_root root_stable_tree[MAX_NUMNODES];
|
static struct rb_root root_stable_tree[MAX_NUMNODES];
|
||||||
|
|
||||||
|
/* Recently migrated nodes of stable tree, pending proper placement */
|
||||||
|
static LIST_HEAD(migrate_nodes);
|
||||||
|
|
||||||
#define MM_SLOTS_HASH_BITS 10
|
#define MM_SLOTS_HASH_BITS 10
|
||||||
static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
|
static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
|
||||||
|
|
||||||
@ -311,11 +326,6 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
|
|||||||
hash_add(mm_slots_hash, &mm_slot->link, (unsigned long)mm);
|
hash_add(mm_slots_hash, &mm_slot->link, (unsigned long)mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int in_stable_tree(struct rmap_item *rmap_item)
|
|
||||||
{
|
|
||||||
return rmap_item->address & STABLE_FLAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
|
* ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
|
||||||
* page tables after it has passed through ksm_exit() - which, if necessary,
|
* page tables after it has passed through ksm_exit() - which, if necessary,
|
||||||
@ -476,7 +486,6 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
|
|||||||
{
|
{
|
||||||
struct rmap_item *rmap_item;
|
struct rmap_item *rmap_item;
|
||||||
struct hlist_node *hlist;
|
struct hlist_node *hlist;
|
||||||
int nid;
|
|
||||||
|
|
||||||
hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
|
hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
|
||||||
if (rmap_item->hlist.next)
|
if (rmap_item->hlist.next)
|
||||||
@ -488,8 +497,11 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
|
|||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
nid = get_kpfn_nid(stable_node->kpfn);
|
if (stable_node->head == &migrate_nodes)
|
||||||
rb_erase(&stable_node->node, &root_stable_tree[nid]);
|
list_del(&stable_node->list);
|
||||||
|
else
|
||||||
|
rb_erase(&stable_node->node,
|
||||||
|
&root_stable_tree[NUMA(stable_node->nid)]);
|
||||||
free_stable_node(stable_node);
|
free_stable_node(stable_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -712,6 +724,7 @@ static int remove_stable_node(struct stable_node *stable_node)
|
|||||||
static int remove_all_stable_nodes(void)
|
static int remove_all_stable_nodes(void)
|
||||||
{
|
{
|
||||||
struct stable_node *stable_node;
|
struct stable_node *stable_node;
|
||||||
|
struct list_head *this, *next;
|
||||||
int nid;
|
int nid;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
@ -726,6 +739,12 @@ static int remove_all_stable_nodes(void)
|
|||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
list_for_each_safe(this, next, &migrate_nodes) {
|
||||||
|
stable_node = list_entry(this, struct stable_node, list);
|
||||||
|
if (remove_stable_node(stable_node))
|
||||||
|
err = -EBUSY;
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1113,25 +1132,30 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
|
|||||||
*/
|
*/
|
||||||
static struct page *stable_tree_search(struct page *page)
|
static struct page *stable_tree_search(struct page *page)
|
||||||
{
|
{
|
||||||
struct rb_node *node;
|
|
||||||
struct stable_node *stable_node;
|
|
||||||
int nid;
|
int nid;
|
||||||
|
struct rb_node **new;
|
||||||
|
struct rb_node *parent;
|
||||||
|
struct stable_node *stable_node;
|
||||||
|
struct stable_node *page_node;
|
||||||
|
|
||||||
stable_node = page_stable_node(page);
|
page_node = page_stable_node(page);
|
||||||
if (stable_node) { /* ksm page forked */
|
if (page_node && page_node->head != &migrate_nodes) {
|
||||||
|
/* ksm page forked */
|
||||||
get_page(page);
|
get_page(page);
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
nid = get_kpfn_nid(page_to_pfn(page));
|
nid = get_kpfn_nid(page_to_pfn(page));
|
||||||
node = root_stable_tree[nid].rb_node;
|
again:
|
||||||
|
new = &root_stable_tree[nid].rb_node;
|
||||||
|
parent = NULL;
|
||||||
|
|
||||||
while (node) {
|
while (*new) {
|
||||||
struct page *tree_page;
|
struct page *tree_page;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
stable_node = rb_entry(node, struct stable_node, node);
|
stable_node = rb_entry(*new, struct stable_node, node);
|
||||||
tree_page = get_ksm_page(stable_node, false);
|
tree_page = get_ksm_page(stable_node, false);
|
||||||
if (!tree_page)
|
if (!tree_page)
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -1139,10 +1163,11 @@ static struct page *stable_tree_search(struct page *page)
|
|||||||
ret = memcmp_pages(page, tree_page);
|
ret = memcmp_pages(page, tree_page);
|
||||||
put_page(tree_page);
|
put_page(tree_page);
|
||||||
|
|
||||||
|
parent = *new;
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
node = node->rb_left;
|
new = &parent->rb_left;
|
||||||
else if (ret > 0)
|
else if (ret > 0)
|
||||||
node = node->rb_right;
|
new = &parent->rb_right;
|
||||||
else {
|
else {
|
||||||
/*
|
/*
|
||||||
* Lock and unlock the stable_node's page (which
|
* Lock and unlock the stable_node's page (which
|
||||||
@ -1152,13 +1177,49 @@ static struct page *stable_tree_search(struct page *page)
|
|||||||
* than kpage, but that involves more changes.
|
* than kpage, but that involves more changes.
|
||||||
*/
|
*/
|
||||||
tree_page = get_ksm_page(stable_node, true);
|
tree_page = get_ksm_page(stable_node, true);
|
||||||
if (tree_page)
|
if (tree_page) {
|
||||||
unlock_page(tree_page);
|
unlock_page(tree_page);
|
||||||
return tree_page;
|
if (get_kpfn_nid(stable_node->kpfn) !=
|
||||||
|
NUMA(stable_node->nid)) {
|
||||||
|
put_page(tree_page);
|
||||||
|
goto replace;
|
||||||
|
}
|
||||||
|
return tree_page;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* There is now a place for page_node, but the tree may
|
||||||
|
* have been rebalanced, so re-evaluate parent and new.
|
||||||
|
*/
|
||||||
|
if (page_node)
|
||||||
|
goto again;
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
if (!page_node)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
list_del(&page_node->list);
|
||||||
|
DO_NUMA(page_node->nid = nid);
|
||||||
|
rb_link_node(&page_node->node, parent, new);
|
||||||
|
rb_insert_color(&page_node->node, &root_stable_tree[nid]);
|
||||||
|
get_page(page);
|
||||||
|
return page;
|
||||||
|
|
||||||
|
replace:
|
||||||
|
if (page_node) {
|
||||||
|
list_del(&page_node->list);
|
||||||
|
DO_NUMA(page_node->nid = nid);
|
||||||
|
rb_replace_node(&stable_node->node,
|
||||||
|
&page_node->node, &root_stable_tree[nid]);
|
||||||
|
get_page(page);
|
||||||
|
} else {
|
||||||
|
rb_erase(&stable_node->node, &root_stable_tree[nid]);
|
||||||
|
page = NULL;
|
||||||
|
}
|
||||||
|
stable_node->head = &migrate_nodes;
|
||||||
|
list_add(&stable_node->list, stable_node->head);
|
||||||
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1215,6 +1276,7 @@ static struct stable_node *stable_tree_insert(struct page *kpage)
|
|||||||
INIT_HLIST_HEAD(&stable_node->hlist);
|
INIT_HLIST_HEAD(&stable_node->hlist);
|
||||||
stable_node->kpfn = kpfn;
|
stable_node->kpfn = kpfn;
|
||||||
set_page_stable_node(kpage, stable_node);
|
set_page_stable_node(kpage, stable_node);
|
||||||
|
DO_NUMA(stable_node->nid = nid);
|
||||||
rb_link_node(&stable_node->node, parent, new);
|
rb_link_node(&stable_node->node, parent, new);
|
||||||
rb_insert_color(&stable_node->node, &root_stable_tree[nid]);
|
rb_insert_color(&stable_node->node, &root_stable_tree[nid]);
|
||||||
|
|
||||||
@ -1311,11 +1373,6 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
|
|||||||
static void stable_tree_append(struct rmap_item *rmap_item,
|
static void stable_tree_append(struct rmap_item *rmap_item,
|
||||||
struct stable_node *stable_node)
|
struct stable_node *stable_node)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* Usually rmap_item->nid is already set correctly,
|
|
||||||
* but it may be wrong after switching merge_across_nodes.
|
|
||||||
*/
|
|
||||||
DO_NUMA(rmap_item->nid = get_kpfn_nid(stable_node->kpfn));
|
|
||||||
rmap_item->head = stable_node;
|
rmap_item->head = stable_node;
|
||||||
rmap_item->address |= STABLE_FLAG;
|
rmap_item->address |= STABLE_FLAG;
|
||||||
hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
|
hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
|
||||||
@ -1344,10 +1401,29 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
|
|||||||
unsigned int checksum;
|
unsigned int checksum;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
remove_rmap_item_from_tree(rmap_item);
|
stable_node = page_stable_node(page);
|
||||||
|
if (stable_node) {
|
||||||
|
if (stable_node->head != &migrate_nodes &&
|
||||||
|
get_kpfn_nid(stable_node->kpfn) != NUMA(stable_node->nid)) {
|
||||||
|
rb_erase(&stable_node->node,
|
||||||
|
&root_stable_tree[NUMA(stable_node->nid)]);
|
||||||
|
stable_node->head = &migrate_nodes;
|
||||||
|
list_add(&stable_node->list, stable_node->head);
|
||||||
|
}
|
||||||
|
if (stable_node->head != &migrate_nodes &&
|
||||||
|
rmap_item->head == stable_node)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* We first start with searching the page inside the stable tree */
|
/* We first start with searching the page inside the stable tree */
|
||||||
kpage = stable_tree_search(page);
|
kpage = stable_tree_search(page);
|
||||||
|
if (kpage == page && rmap_item->head == stable_node) {
|
||||||
|
put_page(kpage);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_rmap_item_from_tree(rmap_item);
|
||||||
|
|
||||||
if (kpage) {
|
if (kpage) {
|
||||||
err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
|
err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
|
||||||
if (!err) {
|
if (!err) {
|
||||||
@ -1464,6 +1540,27 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page)
|
|||||||
*/
|
*/
|
||||||
lru_add_drain_all();
|
lru_add_drain_all();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Whereas stale stable_nodes on the stable_tree itself
|
||||||
|
* get pruned in the regular course of stable_tree_search(),
|
||||||
|
* those moved out to the migrate_nodes list can accumulate:
|
||||||
|
* so prune them once before each full scan.
|
||||||
|
*/
|
||||||
|
if (!ksm_merge_across_nodes) {
|
||||||
|
struct stable_node *stable_node;
|
||||||
|
struct list_head *this, *next;
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
list_for_each_safe(this, next, &migrate_nodes) {
|
||||||
|
stable_node = list_entry(this,
|
||||||
|
struct stable_node, list);
|
||||||
|
page = get_ksm_page(stable_node, false);
|
||||||
|
if (page)
|
||||||
|
put_page(page);
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (nid = 0; nid < nr_node_ids; nid++)
|
for (nid = 0; nid < nr_node_ids; nid++)
|
||||||
root_unstable_tree[nid] = RB_ROOT;
|
root_unstable_tree[nid] = RB_ROOT;
|
||||||
|
|
||||||
@ -1586,8 +1683,7 @@ static void ksm_do_scan(unsigned int scan_npages)
|
|||||||
rmap_item = scan_get_next_rmap_item(&page);
|
rmap_item = scan_get_next_rmap_item(&page);
|
||||||
if (!rmap_item)
|
if (!rmap_item)
|
||||||
return;
|
return;
|
||||||
if (!PageKsm(page) || !in_stable_tree(rmap_item))
|
cmp_and_merge_page(page, rmap_item);
|
||||||
cmp_and_merge_page(page, rmap_item);
|
|
||||||
put_page(page);
|
put_page(page);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1964,6 +2060,7 @@ static void ksm_check_stable_tree(unsigned long start_pfn,
|
|||||||
unsigned long end_pfn)
|
unsigned long end_pfn)
|
||||||
{
|
{
|
||||||
struct stable_node *stable_node;
|
struct stable_node *stable_node;
|
||||||
|
struct list_head *this, *next;
|
||||||
struct rb_node *node;
|
struct rb_node *node;
|
||||||
int nid;
|
int nid;
|
||||||
|
|
||||||
@ -1984,6 +2081,13 @@ static void ksm_check_stable_tree(unsigned long start_pfn,
|
|||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
list_for_each_safe(this, next, &migrate_nodes) {
|
||||||
|
stable_node = list_entry(this, struct stable_node, list);
|
||||||
|
if (stable_node->kpfn >= start_pfn &&
|
||||||
|
stable_node->kpfn < end_pfn)
|
||||||
|
remove_node_from_stable_tree(stable_node);
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ksm_memory_callback(struct notifier_block *self,
|
static int ksm_memory_callback(struct notifier_block *self,
|
||||||
|
Loading…
Reference in New Issue
Block a user