From e1a1cd590e3fcb0d2e230128daf2337ea55387dc Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Thu, 7 Feb 2008 00:14:02 -0800 Subject: [PATCH] Memory controller: make charging gfp mask aware Nick Piggin pointed out that swap cache and page cache addition routines could be called from non GFP_KERNEL contexts. This patch makes the charging routine aware of the gfp context. Charging might fail if the cgroup is over it's limit, in which case a suitable error is returned. This patch was tested on a Powerpc box. I am still looking at being able to test the path, through which allocations happen in non GFP_KERNEL contexts. [kamezawa.hiroyu@jp.fujitsu.com: problem with ZONE_MOVABLE] Signed-off-by: Balbir Singh Cc: Pavel Emelianov Cc: Paul Menage Cc: Peter Zijlstra Cc: "Eric W. Biederman" Cc: Nick Piggin Cc: Kirill Korotaev Cc: Herbert Poetzl Cc: David Rientjes Cc: Vaidyanathan Srinivasan Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 12 ++++++++---- include/linux/swap.h | 3 ++- mm/filemap.c | 2 +- mm/memcontrol.c | 24 +++++++++++++++++------- mm/memory.c | 10 +++++----- mm/migrate.c | 2 +- mm/swap_state.c | 2 +- mm/swapfile.c | 2 +- mm/vmscan.c | 14 +++++--------- 9 files changed, 41 insertions(+), 30 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9d0a830423b6..cc0ad7191acd 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -32,7 +32,8 @@ extern void mm_free_cgroup(struct mm_struct *mm); extern void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc); extern struct page_cgroup *page_get_page_cgroup(struct page *page); -extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm); +extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask); extern void mem_cgroup_uncharge(struct page_cgroup *pc); extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, @@ -42,7 +43,8 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct mem_cgroup *mem_cont, int active); extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); -extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm); +extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask); extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm); static inline void mem_cgroup_uncharge_page(struct page *page) @@ -70,7 +72,8 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page) return NULL; } -static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm) +static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask) { return 0; } @@ -89,7 +92,8 @@ static inline void mem_cgroup_move_lists(struct page_cgroup *pc, } static inline int mem_cgroup_cache_charge(struct page *page, - struct mm_struct *mm) + struct mm_struct *mm, + gfp_t gfp_mask) { return 0; } diff --git a/include/linux/swap.h b/include/linux/swap.h index 4d91bc0e0fd5..3ca5c4bd6d3f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -183,7 +183,8 @@ extern void swap_setup(void); /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask); -extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem); +extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, + gfp_t gfp_mask); extern int __isolate_lru_page(struct page *page, int mode); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; diff --git a/mm/filemap.c b/mm/filemap.c index 8ae171cc2811..63040d5e0ae2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -464,7 +464,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, if (error == 0) { - error = mem_cgroup_cache_charge(page, current->mm); + error = mem_cgroup_cache_charge(page, current->mm, gfp_mask); if (error) goto out; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ff7cac602984..ac8774426fec 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -261,7 +261,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, * 0 if the charge was successful * < 0 if the cgroup is over its limit */ -int mem_cgroup_charge(struct page *page, struct mm_struct *mm) +int mem_cgroup_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask) { struct mem_cgroup *mem; struct page_cgroup *pc, *race_pc; @@ -293,7 +294,7 @@ retry: unlock_page_cgroup(page); - pc = kzalloc(sizeof(struct page_cgroup), GFP_KERNEL); + pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); if (pc == NULL) goto err; @@ -320,7 +321,14 @@ retry: * the cgroup limit. */ while (res_counter_charge(&mem->res, PAGE_SIZE)) { - if (try_to_free_mem_cgroup_pages(mem)) + bool is_atomic = gfp_mask & GFP_ATOMIC; + /* + * We cannot reclaim under GFP_ATOMIC, fail the charge + */ + if (is_atomic) + goto noreclaim; + + if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) continue; /* @@ -344,9 +352,10 @@ retry: congestion_wait(WRITE, HZ/10); continue; } - +noreclaim: css_put(&mem->css); - mem_cgroup_out_of_memory(mem, GFP_KERNEL); + if (!is_atomic) + mem_cgroup_out_of_memory(mem, GFP_KERNEL); goto free_pc; } @@ -385,7 +394,8 @@ err: /* * See if the cached pages should be charged at all? */ -int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm) +int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask) { struct mem_cgroup *mem; if (!mm) @@ -393,7 +403,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm) mem = rcu_dereference(mm->mem_cgroup); if (mem->control_type == MEM_CGROUP_TYPE_ALL) - return mem_cgroup_charge(page, mm); + return mem_cgroup_charge(page, mm, gfp_mask); else return 0; } diff --git a/mm/memory.c b/mm/memory.c index 0ba224ea6ba4..153a54b2013c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1147,7 +1147,7 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa pte_t *pte; spinlock_t *ptl; - retval = mem_cgroup_charge(page, mm); + retval = mem_cgroup_charge(page, mm, GFP_KERNEL); if (retval) goto out; @@ -1650,7 +1650,7 @@ gotten: cow_user_page(new_page, old_page, address, vma); __SetPageUptodate(new_page); - if (mem_cgroup_charge(new_page, mm)) + if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) goto oom_free_new; /* @@ -2052,7 +2052,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(PGMAJFAULT); } - if (mem_cgroup_charge(page, mm)) { + if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { delayacct_clear_flag(DELAYACCT_PF_SWAPIN); ret = VM_FAULT_OOM; goto out; @@ -2139,7 +2139,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, goto oom; __SetPageUptodate(page); - if (mem_cgroup_charge(page, mm)) + if (mem_cgroup_charge(page, mm, GFP_KERNEL)) goto oom_free_page; entry = mk_pte(page, vma->vm_page_prot); @@ -2277,7 +2277,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, } - if (mem_cgroup_charge(page, mm)) { + if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { ret = VM_FAULT_OOM; goto out; } diff --git a/mm/migrate.c b/mm/migrate.c index 417bbda14e5b..763794144697 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -153,7 +153,7 @@ static void remove_migration_pte(struct vm_area_struct *vma, return; } - if (mem_cgroup_charge(new, mm)) { + if (mem_cgroup_charge(new, mm, GFP_KERNEL)) { pte_unmap(ptep); return; } diff --git a/mm/swap_state.c b/mm/swap_state.c index 88258869c8e7..581b609e748d 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -78,7 +78,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) error = radix_tree_preload(gfp_mask); if (!error) { - error = mem_cgroup_cache_charge(page, current->mm); + error = mem_cgroup_cache_charge(page, current->mm, gfp_mask); if (error) goto out; diff --git a/mm/swapfile.c b/mm/swapfile.c index fddc4cc4149b..35e00c3d0286 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -510,7 +510,7 @@ unsigned int count_swap_pages(int type, int free) static int unuse_pte(struct vm_area_struct *vma, pte_t *pte, unsigned long addr, swp_entry_t entry, struct page *page) { - if (mem_cgroup_charge(page, vma->vm_mm)) + if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) return -ENOMEM; inc_mm_counter(vma->vm_mm, anon_rss); diff --git a/mm/vmscan.c b/mm/vmscan.c index 215f6a726b2f..b7d868cbca09 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1337,16 +1337,11 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) #ifdef CONFIG_CGROUP_MEM_CONT -#ifdef CONFIG_HIGHMEM -#define ZONE_USERPAGES ZONE_HIGHMEM -#else -#define ZONE_USERPAGES ZONE_NORMAL -#endif - -unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont) +unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, + gfp_t gfp_mask) { struct scan_control sc = { - .gfp_mask = GFP_KERNEL, + .gfp_mask = gfp_mask, .may_writepage = !laptop_mode, .may_swap = 1, .swap_cluster_max = SWAP_CLUSTER_MAX, @@ -1357,9 +1352,10 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont) }; int node; struct zone **zones; + int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE); for_each_online_node(node) { - zones = NODE_DATA(node)->node_zonelists[ZONE_USERPAGES].zones; + zones = NODE_DATA(node)->node_zonelists[target_zone].zones; if (do_try_to_free_pages(zones, sc.gfp_mask, &sc)) return 1; }