mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-24 20:54:10 +08:00
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: - a few hotfixes - various misc updates - ocfs2 updates - most of MM * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (108 commits) mm, memory_hotplug: move movable_node to the hotplug proper mm, memory_hotplug: drop CONFIG_MOVABLE_NODE mm, memory_hotplug: drop artificial restriction on online/offline mm: memcontrol: account slab stats per lruvec mm: memcontrol: per-lruvec stats infrastructure mm: memcontrol: use generic mod_memcg_page_state for kmem pages mm: memcontrol: use the node-native slab memory counters mm: vmstat: move slab statistics from zone to node counters mm/zswap.c: delete an error message for a failed memory allocation in zswap_dstmem_prepare() mm/zswap.c: improve a size determination in zswap_frontswap_init() mm/zswap.c: delete an error message for a failed memory allocation in zswap_pool_create() mm/swapfile.c: sort swap entries before free mm/oom_kill: count global and memory cgroup oom kills mm: per-cgroup memory reclaim stats mm: kmemleak: treat vm_struct as alternative reference to vmalloc'ed objects mm: kmemleak: factor object reference updating out of scan_block() mm: kmemleak: slightly reduce the size of some structures on 64-bit architectures mm, mempolicy: don't check cpuset seqlock where it doesn't matter mm, cpuset: always use seqlock when changing task's nodemask mm, mempolicy: simplify rebinding mempolicies when updating cpusets ...
This commit is contained in:
commit
9f45efb928
@ -2315,8 +2315,11 @@
|
||||
that the amount of memory usable for all allocations
|
||||
is not too small.
|
||||
|
||||
movable_node [KNL] Boot-time switch to enable the effects
|
||||
of CONFIG_MOVABLE_NODE=y. See mm/Kconfig for details.
|
||||
movable_node [KNL] Boot-time switch to make hotplugable memory
|
||||
NUMA nodes to be movable. This means that the memory
|
||||
of such nodes will be usable only for movable
|
||||
allocations which rules out almost all kernel
|
||||
allocations. Use with caution!
|
||||
|
||||
MTD_Partition= [MTD]
|
||||
Format: <name>,<region-number>,<size>,<offset>
|
||||
@ -3772,8 +3775,14 @@
|
||||
slab_nomerge [MM]
|
||||
Disable merging of slabs with similar size. May be
|
||||
necessary if there is some reason to distinguish
|
||||
allocs to different slabs. Debug options disable
|
||||
merging on their own.
|
||||
allocs to different slabs, especially in hardened
|
||||
environments where the risk of heap overflows and
|
||||
layout control by attackers can usually be
|
||||
frustrated by disabling merging. This will reduce
|
||||
most of the exposure of a heap attack to a single
|
||||
cache (risks via metadata attacks are mostly
|
||||
unchanged). Debug options disable merging on their
|
||||
own.
|
||||
For more information see Documentation/vm/slub.txt.
|
||||
|
||||
slab_max_order= [MM, SLAB]
|
||||
|
@ -852,13 +852,25 @@ PAGE_SIZE multiple when read back.
|
||||
|
||||
The number of times the cgroup's memory usage was
|
||||
about to go over the max boundary. If direct reclaim
|
||||
fails to bring it down, the OOM killer is invoked.
|
||||
fails to bring it down, the cgroup goes to OOM state.
|
||||
|
||||
oom
|
||||
|
||||
The number of times the OOM killer has been invoked in
|
||||
the cgroup. This may not exactly match the number of
|
||||
processes killed but should generally be close.
|
||||
The number of time the cgroup's memory usage was
|
||||
reached the limit and allocation was about to fail.
|
||||
|
||||
Depending on context result could be invocation of OOM
|
||||
killer and retrying allocation or failing alloction.
|
||||
|
||||
Failed allocation in its turn could be returned into
|
||||
userspace as -ENOMEM or siletly ignored in cases like
|
||||
disk readahead. For now OOM in memory cgroup kills
|
||||
tasks iff shortage has happened inside page fault.
|
||||
|
||||
oom_kill
|
||||
|
||||
The number of processes belonging to this cgroup
|
||||
killed by any kind of OOM killer.
|
||||
|
||||
memory.stat
|
||||
|
||||
@ -956,6 +968,34 @@ PAGE_SIZE multiple when read back.
|
||||
|
||||
Number of times a shadow node has been reclaimed
|
||||
|
||||
pgrefill
|
||||
|
||||
Amount of scanned pages (in an active LRU list)
|
||||
|
||||
pgscan
|
||||
|
||||
Amount of scanned pages (in an inactive LRU list)
|
||||
|
||||
pgsteal
|
||||
|
||||
Amount of reclaimed pages
|
||||
|
||||
pgactivate
|
||||
|
||||
Amount of pages moved to the active LRU list
|
||||
|
||||
pgdeactivate
|
||||
|
||||
Amount of pages moved to the inactive LRU lis
|
||||
|
||||
pglazyfree
|
||||
|
||||
Amount of pages postponed to be freed under memory pressure
|
||||
|
||||
pglazyfreed
|
||||
|
||||
Amount of reclaimed lazyfree pages
|
||||
|
||||
memory.swap.current
|
||||
|
||||
A read-only single value file which exists on non-root
|
||||
|
@ -150,6 +150,7 @@ See the include/linux/kmemleak.h header for the functions prototype.
|
||||
- ``kmemleak_init`` - initialize kmemleak
|
||||
- ``kmemleak_alloc`` - notify of a memory block allocation
|
||||
- ``kmemleak_alloc_percpu`` - notify of a percpu memory block allocation
|
||||
- ``kmemleak_vmalloc`` - notify of a vmalloc() memory allocation
|
||||
- ``kmemleak_free`` - notify of a memory block freeing
|
||||
- ``kmemleak_free_part`` - notify of a partial memory block freeing
|
||||
- ``kmemleak_free_percpu`` - notify of a percpu memory block freeing
|
||||
|
@ -98,6 +98,50 @@ use_zero_pages - specifies whether empty pages (i.e. allocated pages
|
||||
it is only effective for pages merged after the change.
|
||||
Default: 0 (normal KSM behaviour as in earlier releases)
|
||||
|
||||
max_page_sharing - Maximum sharing allowed for each KSM page. This
|
||||
enforces a deduplication limit to avoid the virtual
|
||||
memory rmap lists to grow too large. The minimum
|
||||
value is 2 as a newly created KSM page will have at
|
||||
least two sharers. The rmap walk has O(N)
|
||||
complexity where N is the number of rmap_items
|
||||
(i.e. virtual mappings) that are sharing the page,
|
||||
which is in turn capped by max_page_sharing. So
|
||||
this effectively spread the the linear O(N)
|
||||
computational complexity from rmap walk context
|
||||
over different KSM pages. The ksmd walk over the
|
||||
stable_node "chains" is also O(N), but N is the
|
||||
number of stable_node "dups", not the number of
|
||||
rmap_items, so it has not a significant impact on
|
||||
ksmd performance. In practice the best stable_node
|
||||
"dup" candidate will be kept and found at the head
|
||||
of the "dups" list. The higher this value the
|
||||
faster KSM will merge the memory (because there
|
||||
will be fewer stable_node dups queued into the
|
||||
stable_node chain->hlist to check for pruning) and
|
||||
the higher the deduplication factor will be, but
|
||||
the slowest the worst case rmap walk could be for
|
||||
any given KSM page. Slowing down the rmap_walk
|
||||
means there will be higher latency for certain
|
||||
virtual memory operations happening during
|
||||
swapping, compaction, NUMA balancing and page
|
||||
migration, in turn decreasing responsiveness for
|
||||
the caller of those virtual memory operations. The
|
||||
scheduler latency of other tasks not involved with
|
||||
the VM operations doing the rmap walk is not
|
||||
affected by this parameter as the rmap walks are
|
||||
always schedule friendly themselves.
|
||||
|
||||
stable_node_chains_prune_millisecs - How frequently to walk the whole
|
||||
list of stable_node "dups" linked in the
|
||||
stable_node "chains" in order to prune stale
|
||||
stable_nodes. Smaller milllisecs values will free
|
||||
up the KSM metadata with lower latency, but they
|
||||
will make ksmd use more CPU during the scan. This
|
||||
only applies to the stable_node chains so it's a
|
||||
noop if not a single KSM page hit the
|
||||
max_page_sharing yet (there would be no stable_node
|
||||
chains in such case).
|
||||
|
||||
The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
|
||||
|
||||
pages_shared - how many shared pages are being used
|
||||
@ -106,10 +150,29 @@ pages_unshared - how many pages unique but repeatedly checked for merging
|
||||
pages_volatile - how many pages changing too fast to be placed in a tree
|
||||
full_scans - how many times all mergeable areas have been scanned
|
||||
|
||||
stable_node_chains - number of stable node chains allocated, this is
|
||||
effectively the number of KSM pages that hit the
|
||||
max_page_sharing limit
|
||||
stable_node_dups - number of stable node dups queued into the
|
||||
stable_node chains
|
||||
|
||||
A high ratio of pages_sharing to pages_shared indicates good sharing, but
|
||||
a high ratio of pages_unshared to pages_sharing indicates wasted effort.
|
||||
pages_volatile embraces several different kinds of activity, but a high
|
||||
proportion there would also indicate poor use of madvise MADV_MERGEABLE.
|
||||
|
||||
The maximum possible page_sharing/page_shared ratio is limited by the
|
||||
max_page_sharing tunable. To increase the ratio max_page_sharing must
|
||||
be increased accordingly.
|
||||
|
||||
The stable_node_dups/stable_node_chains ratio is also affected by the
|
||||
max_page_sharing tunable, and an high ratio may indicate fragmentation
|
||||
in the stable_node dups, which could be solved by introducing
|
||||
fragmentation algorithms in ksmd which would refile rmap_items from
|
||||
one stable_node dup to another stable_node dup, in order to freeup
|
||||
stable_node "dups" with few rmap_items in them, but that may increase
|
||||
the ksmd CPU usage and possibly slowdown the readonly computations on
|
||||
the KSM pages of the applications.
|
||||
|
||||
Izik Eidus,
|
||||
Hugh Dickins, 17 Nov 2009
|
||||
|
@ -13,7 +13,7 @@ config ARM64
|
||||
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
|
||||
select ARCH_HAS_ELF_RANDOMIZE
|
||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||
select ARCH_HAS_GIGANTIC_PAGE
|
||||
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
|
||||
select ARCH_HAS_KCOV
|
||||
select ARCH_HAS_SET_MEMORY
|
||||
select ARCH_HAS_SG_CHAIN
|
||||
|
@ -83,4 +83,8 @@ extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
|
||||
extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep);
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
|
||||
static inline bool gigantic_page_supported(void) { return true; }
|
||||
#endif
|
||||
|
||||
#endif /* __ASM_HUGETLB_H */
|
||||
|
@ -42,15 +42,13 @@ int pud_huge(pud_t pud)
|
||||
}
|
||||
|
||||
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte, size_t *pgsize)
|
||||
pte_t *ptep, size_t *pgsize)
|
||||
{
|
||||
pgd_t *pgd = pgd_offset(mm, addr);
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
*pgsize = PAGE_SIZE;
|
||||
if (!pte_cont(pte))
|
||||
return 1;
|
||||
pud = pud_offset(pgd, addr);
|
||||
pmd = pmd_offset(pud, addr);
|
||||
if ((pte_t *)pmd == ptep) {
|
||||
@ -65,15 +63,16 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
{
|
||||
size_t pgsize;
|
||||
int i;
|
||||
int ncontig = find_num_contig(mm, addr, ptep, pte, &pgsize);
|
||||
int ncontig;
|
||||
unsigned long pfn;
|
||||
pgprot_t hugeprot;
|
||||
|
||||
if (ncontig == 1) {
|
||||
if (!pte_cont(pte)) {
|
||||
set_pte_at(mm, addr, ptep, pte);
|
||||
return;
|
||||
}
|
||||
|
||||
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
|
||||
pfn = pte_pfn(pte);
|
||||
hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
|
||||
for (i = 0; i < ncontig; i++) {
|
||||
@ -132,7 +131,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
@ -184,21 +184,19 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
|
||||
if (pte_cont(*ptep)) {
|
||||
int ncontig, i;
|
||||
size_t pgsize;
|
||||
pte_t *cpte;
|
||||
bool is_dirty = false;
|
||||
|
||||
cpte = huge_pte_offset(mm, addr);
|
||||
ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
|
||||
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
|
||||
/* save the 1st pte to return */
|
||||
pte = ptep_get_and_clear(mm, addr, cpte);
|
||||
pte = ptep_get_and_clear(mm, addr, ptep);
|
||||
for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) {
|
||||
/*
|
||||
* If HW_AFDBM is enabled, then the HW could
|
||||
* turn on the dirty bit for any of the page
|
||||
* in the set, so check them all.
|
||||
*/
|
||||
++cpte;
|
||||
if (pte_dirty(ptep_get_and_clear(mm, addr, cpte)))
|
||||
++ptep;
|
||||
if (pte_dirty(ptep_get_and_clear(mm, addr, ptep)))
|
||||
is_dirty = true;
|
||||
}
|
||||
if (is_dirty)
|
||||
@ -214,8 +212,6 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep,
|
||||
pte_t pte, int dirty)
|
||||
{
|
||||
pte_t *cpte;
|
||||
|
||||
if (pte_cont(pte)) {
|
||||
int ncontig, i, changed = 0;
|
||||
size_t pgsize = 0;
|
||||
@ -225,12 +221,11 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
|
||||
__pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^
|
||||
pte_val(pte));
|
||||
|
||||
cpte = huge_pte_offset(vma->vm_mm, addr);
|
||||
pfn = pte_pfn(*cpte);
|
||||
ncontig = find_num_contig(vma->vm_mm, addr, cpte,
|
||||
*cpte, &pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) {
|
||||
changed |= ptep_set_access_flags(vma, addr, cpte,
|
||||
pfn = pte_pfn(pte);
|
||||
ncontig = find_num_contig(vma->vm_mm, addr, ptep,
|
||||
&pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize) {
|
||||
changed |= ptep_set_access_flags(vma, addr, ptep,
|
||||
pfn_pte(pfn,
|
||||
hugeprot),
|
||||
dirty);
|
||||
@ -247,13 +242,11 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
|
||||
{
|
||||
if (pte_cont(*ptep)) {
|
||||
int ncontig, i;
|
||||
pte_t *cpte;
|
||||
size_t pgsize = 0;
|
||||
|
||||
cpte = huge_pte_offset(mm, addr);
|
||||
ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize)
|
||||
ptep_set_wrprotect(mm, addr, cpte);
|
||||
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
|
||||
ptep_set_wrprotect(mm, addr, ptep);
|
||||
} else {
|
||||
ptep_set_wrprotect(mm, addr, ptep);
|
||||
}
|
||||
@ -264,14 +257,12 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
|
||||
{
|
||||
if (pte_cont(*ptep)) {
|
||||
int ncontig, i;
|
||||
pte_t *cpte;
|
||||
size_t pgsize = 0;
|
||||
|
||||
cpte = huge_pte_offset(vma->vm_mm, addr);
|
||||
ncontig = find_num_contig(vma->vm_mm, addr, cpte,
|
||||
*cpte, &pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize)
|
||||
ptep_clear_flush(vma, addr, cpte);
|
||||
ncontig = find_num_contig(vma->vm_mm, addr, ptep,
|
||||
&pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
|
||||
ptep_clear_flush(vma, addr, ptep);
|
||||
} else {
|
||||
ptep_clear_flush(vma, addr, ptep);
|
||||
}
|
||||
|
@ -24,7 +24,6 @@
|
||||
/*
|
||||
* Page table definitions for Qualcomm Hexagon processor.
|
||||
*/
|
||||
#include <linux/swap.h>
|
||||
#include <asm/page.h>
|
||||
#define __ARCH_USE_5LEVEL_HACK
|
||||
#include <asm-generic/pgtable-nopmd.h>
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include <linux/compat.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kbuild.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
@ -24,6 +24,7 @@
|
||||
* be instantiated for it, differently from a native build.
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/hexagon_vm.h>
|
||||
|
||||
|
@ -44,7 +44,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
|
||||
}
|
||||
|
||||
pte_t *
|
||||
huge_pte_offset (struct mm_struct *mm, unsigned long addr)
|
||||
huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz)
|
||||
{
|
||||
unsigned long taddr = htlbpage_to_page(addr);
|
||||
pgd_t *pgd;
|
||||
@ -92,7 +92,7 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int writ
|
||||
if (REGION_NUMBER(addr) != RGN_HPAGE)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
ptep = huge_pte_offset(mm, addr);
|
||||
ptep = huge_pte_offset(mm, addr, HPAGE_SIZE);
|
||||
if (!ptep || pte_none(*ptep))
|
||||
return NULL;
|
||||
page = pte_page(*ptep);
|
||||
|
@ -646,20 +646,13 @@ mem_init (void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
|
||||
{
|
||||
pg_data_t *pgdat;
|
||||
struct zone *zone;
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
int ret;
|
||||
|
||||
pgdat = NODE_DATA(nid);
|
||||
|
||||
zone = pgdat->node_zones +
|
||||
zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
|
||||
ret = __add_pages(nid, zone, start_pfn, nr_pages);
|
||||
|
||||
ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
|
||||
if (ret)
|
||||
printk("%s: Problem encountered in __add_pages() as ret=%d\n",
|
||||
__func__, ret);
|
||||
|
@ -74,7 +74,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
|
@ -36,7 +36,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
|
@ -1,8 +1,10 @@
|
||||
|
||||
generic-y += barrier.h
|
||||
generic-y += clkdev.h
|
||||
generic-y += device.h
|
||||
generic-y += exec.h
|
||||
generic-y += extable.h
|
||||
generic-y += fb.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
|
@ -1 +0,0 @@
|
||||
#include <asm-generic/device.h>
|
@ -1,23 +0,0 @@
|
||||
/* MN10300 Frame buffer stuff
|
||||
*
|
||||
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public Licence
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the Licence, or (at your option) any later version.
|
||||
*/
|
||||
#ifndef _ASM_FB_H
|
||||
#define _ASM_FB_H
|
||||
|
||||
#include <linux/fb.h>
|
||||
|
||||
#define fb_pgprotect(...) do {} while (0)
|
||||
|
||||
static inline int fb_is_primary_device(struct fb_info *info)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* _ASM_FB_H */
|
@ -69,7 +69,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
|
@ -50,4 +50,14 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
|
||||
else
|
||||
return entry;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
|
||||
static inline bool gigantic_page_supported(void)
|
||||
{
|
||||
if (radix_enabled())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -17,6 +17,8 @@
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/swapops.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlb.h>
|
||||
@ -55,7 +57,7 @@ static unsigned nr_gpages;
|
||||
|
||||
#define hugepd_none(hpd) (hpd_val(hpd) == 0)
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
|
||||
{
|
||||
/* Only called for hugetlbfs pages, hence can ignore THP */
|
||||
return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
|
||||
@ -617,62 +619,39 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
|
||||
} while (addr = next, addr != end);
|
||||
}
|
||||
|
||||
/*
|
||||
* We are holding mmap_sem, so a parallel huge page collapse cannot run.
|
||||
* To prevent hugepage split, disable irq.
|
||||
*/
|
||||
struct page *
|
||||
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
|
||||
struct page *follow_huge_pd(struct vm_area_struct *vma,
|
||||
unsigned long address, hugepd_t hpd,
|
||||
int flags, int pdshift)
|
||||
{
|
||||
bool is_thp;
|
||||
pte_t *ptep, pte;
|
||||
unsigned shift;
|
||||
unsigned long mask, flags;
|
||||
struct page *page = ERR_PTR(-EINVAL);
|
||||
pte_t *ptep;
|
||||
spinlock_t *ptl;
|
||||
struct page *page = NULL;
|
||||
unsigned long mask;
|
||||
int shift = hugepd_shift(hpd);
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
||||
local_irq_save(flags);
|
||||
ptep = find_linux_pte_or_hugepte(mm->pgd, address, &is_thp, &shift);
|
||||
if (!ptep)
|
||||
goto no_page;
|
||||
pte = READ_ONCE(*ptep);
|
||||
/*
|
||||
* Verify it is a huge page else bail.
|
||||
* Transparent hugepages are handled by generic code. We can skip them
|
||||
* here.
|
||||
*/
|
||||
if (!shift || is_thp)
|
||||
goto no_page;
|
||||
retry:
|
||||
ptl = &mm->page_table_lock;
|
||||
spin_lock(ptl);
|
||||
|
||||
if (!pte_present(pte)) {
|
||||
page = NULL;
|
||||
goto no_page;
|
||||
ptep = hugepte_offset(hpd, address, pdshift);
|
||||
if (pte_present(*ptep)) {
|
||||
mask = (1UL << shift) - 1;
|
||||
page = pte_page(*ptep);
|
||||
page += ((address & mask) >> PAGE_SHIFT);
|
||||
if (flags & FOLL_GET)
|
||||
get_page(page);
|
||||
} else {
|
||||
if (is_hugetlb_entry_migration(*ptep)) {
|
||||
spin_unlock(ptl);
|
||||
__migration_entry_wait(mm, ptep, ptl);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
mask = (1UL << shift) - 1;
|
||||
page = pte_page(pte);
|
||||
if (page)
|
||||
page += (address & mask) / PAGE_SIZE;
|
||||
|
||||
no_page:
|
||||
local_irq_restore(flags);
|
||||
spin_unlock(ptl);
|
||||
return page;
|
||||
}
|
||||
|
||||
struct page *
|
||||
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
||||
pmd_t *pmd, int write)
|
||||
{
|
||||
BUG();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct page *
|
||||
follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
||||
pud_t *pud, int write)
|
||||
{
|
||||
BUG();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
|
||||
unsigned long sz)
|
||||
{
|
||||
@ -763,8 +742,11 @@ static int __init add_huge_page_size(unsigned long long size)
|
||||
* Hash: 16M and 16G
|
||||
*/
|
||||
if (radix_enabled()) {
|
||||
if (mmu_psize != MMU_PAGE_2M)
|
||||
return -EINVAL;
|
||||
if (mmu_psize != MMU_PAGE_2M) {
|
||||
if (cpu_has_feature(CPU_FTR_POWER9_DD1) ||
|
||||
(mmu_psize != MMU_PAGE_1G))
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
|
||||
return -EINVAL;
|
||||
|
@ -126,18 +126,14 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
|
||||
{
|
||||
struct pglist_data *pgdata;
|
||||
struct zone *zone;
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
int rc;
|
||||
|
||||
resize_hpt_for_hotplug(memblock_phys_mem_size());
|
||||
|
||||
pgdata = NODE_DATA(nid);
|
||||
|
||||
start = (unsigned long)__va(start);
|
||||
rc = create_section_mapping(start, start + size);
|
||||
if (rc) {
|
||||
@ -147,11 +143,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/* this should work for most non-highmem platforms */
|
||||
zone = pgdata->node_zones +
|
||||
zone_for_memory(nid, start, size, 0, for_device);
|
||||
|
||||
return __add_pages(nid, zone, start_pfn, nr_pages);
|
||||
return __add_pages(nid, start_pfn, nr_pages, want_memblock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
|
@ -344,12 +344,18 @@ config PPC_STD_MMU_64
|
||||
config PPC_RADIX_MMU
|
||||
bool "Radix MMU Support"
|
||||
depends on PPC_BOOK3S_64
|
||||
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
|
||||
default y
|
||||
help
|
||||
Enable support for the Power ISA 3.0 Radix style MMU. Currently this
|
||||
is only implemented by IBM Power9 CPUs, if you don't have one of them
|
||||
you can probably disable this.
|
||||
|
||||
config ARCH_ENABLE_HUGEPAGE_MIGRATION
|
||||
def_bool y
|
||||
depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
|
||||
|
||||
|
||||
config PPC_MMU_NOHASH
|
||||
def_bool y
|
||||
depends on !PPC_STD_MMU
|
||||
|
@ -68,7 +68,7 @@ config S390
|
||||
select ARCH_HAS_DEVMEM_IS_ALLOWED
|
||||
select ARCH_HAS_ELF_RANDOMIZE
|
||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||
select ARCH_HAS_GIGANTIC_PAGE
|
||||
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
|
||||
select ARCH_HAS_KCOV
|
||||
select ARCH_HAS_SET_MEMORY
|
||||
select ARCH_HAS_SG_CHAIN
|
||||
|
@ -39,7 +39,7 @@ static inline int prepare_hugepage_range(struct file *file,
|
||||
#define arch_clear_hugepage_flags(page) do { } while (0)
|
||||
|
||||
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep)
|
||||
pte_t *ptep, unsigned long sz)
|
||||
{
|
||||
if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
|
||||
pte_val(*ptep) = _REGION3_ENTRY_EMPTY;
|
||||
@ -112,4 +112,7 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
|
||||
return pte_modify(pte, newprot);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
|
||||
static inline bool gigantic_page_supported(void) { return true; }
|
||||
#endif
|
||||
#endif /* _ASM_S390_HUGETLB_H */
|
||||
|
@ -180,7 +180,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
return (pte_t *) pmdp;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgdp;
|
||||
p4d_t *p4dp;
|
||||
|
@ -166,43 +166,17 @@ unsigned long memory_block_size_bytes(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
|
||||
{
|
||||
unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
|
||||
unsigned long start_pfn = PFN_DOWN(start);
|
||||
unsigned long size_pages = PFN_DOWN(size);
|
||||
pg_data_t *pgdat = NODE_DATA(nid);
|
||||
struct zone *zone;
|
||||
int rc, i;
|
||||
int rc;
|
||||
|
||||
rc = vmem_add_mapping(start, size);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
for (i = 0; i < MAX_NR_ZONES; i++) {
|
||||
zone = pgdat->node_zones + i;
|
||||
if (zone_idx(zone) != ZONE_MOVABLE) {
|
||||
/* Add range within existing zone limits, if possible */
|
||||
zone_start_pfn = zone->zone_start_pfn;
|
||||
zone_end_pfn = zone->zone_start_pfn +
|
||||
zone->spanned_pages;
|
||||
} else {
|
||||
/* Add remaining range to ZONE_MOVABLE */
|
||||
zone_start_pfn = start_pfn;
|
||||
zone_end_pfn = start_pfn + size_pages;
|
||||
}
|
||||
if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
|
||||
continue;
|
||||
nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
|
||||
zone_end_pfn - start_pfn : size_pages;
|
||||
rc = __add_pages(nid, zone, start_pfn, nr_pages);
|
||||
if (rc)
|
||||
break;
|
||||
start_pfn += nr_pages;
|
||||
size_pages -= nr_pages;
|
||||
if (!size_pages)
|
||||
break;
|
||||
}
|
||||
rc = __add_pages(nid, start_pfn, size_pages, want_memblock);
|
||||
if (rc)
|
||||
vmem_remove_mapping(start, size);
|
||||
return rc;
|
||||
|
@ -42,7 +42,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
|
@ -485,20 +485,14 @@ void free_initrd_mem(unsigned long start, unsigned long end)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
|
||||
{
|
||||
pg_data_t *pgdat;
|
||||
unsigned long start_pfn = PFN_DOWN(start);
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
int ret;
|
||||
|
||||
pgdat = NODE_DATA(nid);
|
||||
|
||||
/* We only have ZONE_NORMAL, so this is easy.. */
|
||||
ret = __add_pages(nid, pgdat->node_zones +
|
||||
zone_for_memory(nid, start, size, ZONE_NORMAL,
|
||||
for_device),
|
||||
start_pfn, nr_pages);
|
||||
ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
|
||||
if (unlikely(ret))
|
||||
printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
|
||||
|
||||
|
@ -277,7 +277,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
|
@ -102,7 +102,8 @@ static pte_t *get_pte(pte_t *base, int index, int level)
|
||||
return ptep;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
|
@ -503,6 +503,17 @@ void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
|
||||
}
|
||||
EXPORT_SYMBOL(ioremap_prot);
|
||||
|
||||
#if !defined(CONFIG_PCI) || !defined(CONFIG_TILEGX)
|
||||
/* ioremap is conditionally declared in pci_gx.c */
|
||||
|
||||
void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(ioremap);
|
||||
|
||||
#endif
|
||||
|
||||
/* Unmap an MMIO VA mapping. */
|
||||
void iounmap(volatile void __iomem *addr_in)
|
||||
{
|
||||
|
@ -22,7 +22,7 @@ config X86_64
|
||||
def_bool y
|
||||
depends on 64BIT
|
||||
# Options that are inherently 64-bit kernel only:
|
||||
select ARCH_HAS_GIGANTIC_PAGE
|
||||
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
|
||||
select ARCH_SUPPORTS_INT128
|
||||
select ARCH_USE_CMPXCHG_LOCKREF
|
||||
select HAVE_ARCH_SOFT_DIRTY
|
||||
@ -72,6 +72,7 @@ config X86
|
||||
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
select ARCH_WANT_FRAME_POINTERS
|
||||
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
||||
select ARCH_WANTS_THP_SWAP if X86_64
|
||||
select BUILDTIME_EXTABLE_SORT
|
||||
select CLKEVT_I8253
|
||||
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
|
||||
|
@ -85,4 +85,8 @@ static inline void arch_clear_hugepage_flags(struct page *page)
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
|
||||
static inline bool gigantic_page_supported(void) { return true; }
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_HUGETLB_H */
|
||||
|
@ -33,7 +33,7 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
|
||||
if (!vma || !is_vm_hugetlb_page(vma))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
pte = huge_pte_offset(mm, address);
|
||||
pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
|
||||
|
||||
/* hugetlb should be locked, and hence, prefaulted */
|
||||
WARN_ON(!pte || pte_none(*pte));
|
||||
|
@ -823,15 +823,12 @@ void __init mem_init(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
|
||||
{
|
||||
struct pglist_data *pgdata = NODE_DATA(nid);
|
||||
struct zone *zone = pgdata->node_zones +
|
||||
zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device);
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
|
||||
return __add_pages(nid, zone, start_pfn, nr_pages);
|
||||
return __add_pages(nid, start_pfn, nr_pages, want_memblock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
|
@ -772,22 +772,15 @@ static void update_end_of_memory_vars(u64 start, u64 size)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Memory is added always to NORMAL zone. This means you will never get
|
||||
* additional DMA/DMA32 memory.
|
||||
*/
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
|
||||
{
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
struct zone *zone = pgdat->node_zones +
|
||||
zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
int ret;
|
||||
|
||||
init_memory_mapping(start, start + size);
|
||||
|
||||
ret = __add_pages(nid, zone, start_pfn, nr_pages);
|
||||
ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
|
||||
WARN_ON_ONCE(ret);
|
||||
|
||||
/* update max_pfn, max_low_pfn and high_memory */
|
||||
|
@ -128,6 +128,9 @@ static ssize_t show_mem_removable(struct device *dev,
|
||||
int ret = 1;
|
||||
struct memory_block *mem = to_memory_block(dev);
|
||||
|
||||
if (mem->state != MEM_ONLINE)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < sections_per_block; i++) {
|
||||
if (!present_section_nr(mem->start_section_nr + i))
|
||||
continue;
|
||||
@ -135,6 +138,7 @@ static ssize_t show_mem_removable(struct device *dev,
|
||||
ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
|
||||
}
|
||||
|
||||
out:
|
||||
return sprintf(buf, "%d\n", ret);
|
||||
}
|
||||
|
||||
@ -388,39 +392,43 @@ static ssize_t show_valid_zones(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct memory_block *mem = to_memory_block(dev);
|
||||
unsigned long start_pfn, end_pfn;
|
||||
unsigned long valid_start, valid_end, valid_pages;
|
||||
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
|
||||
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
|
||||
struct zone *zone;
|
||||
int zone_shift = 0;
|
||||
unsigned long valid_start_pfn, valid_end_pfn;
|
||||
bool append = false;
|
||||
int nid;
|
||||
|
||||
start_pfn = section_nr_to_pfn(mem->start_section_nr);
|
||||
end_pfn = start_pfn + nr_pages;
|
||||
|
||||
/* The block contains more than one zone can not be offlined. */
|
||||
if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end))
|
||||
/*
|
||||
* The block contains more than one zone can not be offlined.
|
||||
* This can happen e.g. for ZONE_DMA and ZONE_DMA32
|
||||
*/
|
||||
if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, &valid_start_pfn, &valid_end_pfn))
|
||||
return sprintf(buf, "none\n");
|
||||
|
||||
zone = page_zone(pfn_to_page(valid_start));
|
||||
valid_pages = valid_end - valid_start;
|
||||
start_pfn = valid_start_pfn;
|
||||
nr_pages = valid_end_pfn - start_pfn;
|
||||
|
||||
/* MMOP_ONLINE_KEEP */
|
||||
sprintf(buf, "%s", zone->name);
|
||||
|
||||
/* MMOP_ONLINE_KERNEL */
|
||||
zone_can_shift(valid_start, valid_pages, ZONE_NORMAL, &zone_shift);
|
||||
if (zone_shift) {
|
||||
strcat(buf, " ");
|
||||
strcat(buf, (zone + zone_shift)->name);
|
||||
/*
|
||||
* Check the existing zone. Make sure that we do that only on the
|
||||
* online nodes otherwise the page_zone is not reliable
|
||||
*/
|
||||
if (mem->state == MEM_ONLINE) {
|
||||
strcat(buf, page_zone(pfn_to_page(start_pfn))->name);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* MMOP_ONLINE_MOVABLE */
|
||||
zone_can_shift(valid_start, valid_pages, ZONE_MOVABLE, &zone_shift);
|
||||
if (zone_shift) {
|
||||
strcat(buf, " ");
|
||||
strcat(buf, (zone + zone_shift)->name);
|
||||
nid = pfn_to_nid(start_pfn);
|
||||
if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL)) {
|
||||
strcat(buf, default_zone_for_pfn(nid, start_pfn, nr_pages)->name);
|
||||
append = true;
|
||||
}
|
||||
|
||||
if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE)) {
|
||||
if (append)
|
||||
strcat(buf, " ");
|
||||
strcat(buf, NODE_DATA(nid)->node_zones[ZONE_MOVABLE].name);
|
||||
}
|
||||
out:
|
||||
strcat(buf, "\n");
|
||||
|
||||
return strlen(buf);
|
||||
@ -685,14 +693,6 @@ static int add_memory_block(int base_section_nr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool is_zone_device_section(struct mem_section *ms)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = sparse_decode_mem_map(ms->section_mem_map, __section_nr(ms));
|
||||
return is_zone_device_page(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* need an interface for the VM to add new memory regions,
|
||||
* but without onlining it.
|
||||
@ -702,9 +702,6 @@ int register_new_memory(int nid, struct mem_section *section)
|
||||
int ret = 0;
|
||||
struct memory_block *mem;
|
||||
|
||||
if (is_zone_device_section(section))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&mem_sysfs_mutex);
|
||||
|
||||
mem = find_memory_block(section);
|
||||
@ -741,11 +738,16 @@ static int remove_memory_section(unsigned long node_id,
|
||||
{
|
||||
struct memory_block *mem;
|
||||
|
||||
if (is_zone_device_section(section))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&mem_sysfs_mutex);
|
||||
|
||||
/*
|
||||
* Some users of the memory hotplug do not want/need memblock to
|
||||
* track all sections. Skip over those.
|
||||
*/
|
||||
mem = find_memory_block(section);
|
||||
if (!mem)
|
||||
goto out_unlock;
|
||||
|
||||
unregister_mem_sect_under_nodes(mem, __section_nr(section));
|
||||
|
||||
mem->section_count--;
|
||||
@ -754,6 +756,7 @@ static int remove_memory_section(unsigned long node_id,
|
||||
else
|
||||
put_device(&mem->dev);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&mem_sysfs_mutex);
|
||||
return 0;
|
||||
}
|
||||
@ -820,6 +823,10 @@ int __init memory_dev_init(void)
|
||||
*/
|
||||
mutex_lock(&mem_sysfs_mutex);
|
||||
for (i = 0; i < NR_MEM_SECTIONS; i += sections_per_block) {
|
||||
/* Don't iterate over sections we know are !present: */
|
||||
if (i > __highest_present_section_nr)
|
||||
break;
|
||||
|
||||
err = add_memory_block(i);
|
||||
if (!ret)
|
||||
ret = err;
|
||||
|
@ -129,11 +129,11 @@ static ssize_t node_read_meminfo(struct device *dev,
|
||||
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
|
||||
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
|
||||
nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
|
||||
nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE) +
|
||||
sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
|
||||
nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE)),
|
||||
nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE) +
|
||||
node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
|
||||
nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE)),
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
|
||||
nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
|
||||
nid, K(node_page_state(pgdat, NR_ANON_THPS) *
|
||||
HPAGE_PMD_NR),
|
||||
nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
|
||||
@ -141,7 +141,7 @@ static ssize_t node_read_meminfo(struct device *dev,
|
||||
nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
|
||||
HPAGE_PMD_NR));
|
||||
#else
|
||||
nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)));
|
||||
nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)));
|
||||
#endif
|
||||
n += hugetlb_report_node_meminfo(nid, buf + n);
|
||||
return n;
|
||||
@ -368,21 +368,14 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
|
||||
#define page_initialized(page) (page->lru.next)
|
||||
|
||||
static int __ref get_nid_for_pfn(unsigned long pfn)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
if (!pfn_valid_within(pfn))
|
||||
return -1;
|
||||
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||
if (system_state < SYSTEM_RUNNING)
|
||||
return early_pfn_to_nid(pfn);
|
||||
#endif
|
||||
page = pfn_to_page(pfn);
|
||||
if (!page_initialized(page))
|
||||
return -1;
|
||||
return pfn_to_nid(pfn);
|
||||
}
|
||||
|
||||
@ -468,10 +461,9 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int link_mem_sections(int nid)
|
||||
int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages)
|
||||
{
|
||||
unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
|
||||
unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
|
||||
unsigned long end_pfn = start_pfn + nr_pages;
|
||||
unsigned long pfn;
|
||||
struct memory_block *mem_blk = NULL;
|
||||
int err = 0;
|
||||
@ -559,10 +551,7 @@ static int node_memory_callback(struct notifier_block *self,
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
#endif /* CONFIG_HUGETLBFS */
|
||||
#else /* !CONFIG_MEMORY_HOTPLUG_SPARSE */
|
||||
|
||||
static int link_mem_sections(int nid) { return 0; }
|
||||
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
|
||||
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
|
||||
|
||||
#if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \
|
||||
!defined(CONFIG_HUGETLBFS)
|
||||
@ -576,39 +565,32 @@ static void init_node_hugetlb_work(int nid) { }
|
||||
|
||||
#endif
|
||||
|
||||
int register_one_node(int nid)
|
||||
int __register_one_node(int nid)
|
||||
{
|
||||
int error = 0;
|
||||
int p_node = parent_node(nid);
|
||||
struct node *parent = NULL;
|
||||
int error;
|
||||
int cpu;
|
||||
|
||||
if (node_online(nid)) {
|
||||
int p_node = parent_node(nid);
|
||||
struct node *parent = NULL;
|
||||
if (p_node != nid)
|
||||
parent = node_devices[p_node];
|
||||
|
||||
if (p_node != nid)
|
||||
parent = node_devices[p_node];
|
||||
node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
|
||||
if (!node_devices[nid])
|
||||
return -ENOMEM;
|
||||
|
||||
node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
|
||||
if (!node_devices[nid])
|
||||
return -ENOMEM;
|
||||
error = register_node(node_devices[nid], nid, parent);
|
||||
|
||||
error = register_node(node_devices[nid], nid, parent);
|
||||
|
||||
/* link cpu under this node */
|
||||
for_each_present_cpu(cpu) {
|
||||
if (cpu_to_node(cpu) == nid)
|
||||
register_cpu_under_node(cpu, nid);
|
||||
}
|
||||
|
||||
/* link memory sections under this node */
|
||||
error = link_mem_sections(nid);
|
||||
|
||||
/* initialize work queue for memory hot plug */
|
||||
init_node_hugetlb_work(nid);
|
||||
/* link cpu under this node */
|
||||
for_each_present_cpu(cpu) {
|
||||
if (cpu_to_node(cpu) == nid)
|
||||
register_cpu_under_node(cpu, nid);
|
||||
}
|
||||
|
||||
return error;
|
||||
/* initialize work queue for memory hot plug */
|
||||
init_node_hugetlb_work(nid);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
void unregister_one_node(int nid)
|
||||
@ -657,9 +639,7 @@ static struct node_attr node_state_attr[] = {
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
[N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
|
||||
#endif
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
|
||||
#endif
|
||||
[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
|
||||
};
|
||||
|
||||
@ -670,9 +650,7 @@ static struct attribute *node_state_attrs[] = {
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
&node_state_attr[N_HIGH_MEMORY].attr.attr,
|
||||
#endif
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
&node_state_attr[N_MEMORY].attr.attr,
|
||||
#endif
|
||||
&node_state_attr[N_CPU].attr.attr,
|
||||
NULL
|
||||
};
|
||||
|
@ -469,6 +469,7 @@ static bool zram_same_page_write(struct zram *zram, u32 index,
|
||||
zram_slot_unlock(zram, index);
|
||||
|
||||
atomic64_inc(&zram->stats.same_pages);
|
||||
atomic64_inc(&zram->stats.pages_stored);
|
||||
return true;
|
||||
}
|
||||
kunmap_atomic(mem);
|
||||
@ -524,6 +525,7 @@ static void zram_free_page(struct zram *zram, size_t index)
|
||||
zram_clear_flag(zram, index, ZRAM_SAME);
|
||||
zram_set_element(zram, index, 0);
|
||||
atomic64_dec(&zram->stats.same_pages);
|
||||
atomic64_dec(&zram->stats.pages_stored);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -94,10 +94,8 @@ static int add_virq_to_pirq(unsigned int irq, unsigned int virq)
|
||||
}
|
||||
|
||||
entry = kzalloc(sizeof(struct intc_virq_list), GFP_ATOMIC);
|
||||
if (!entry) {
|
||||
pr_err("can't allocate VIRQ mapping for %d\n", virq);
|
||||
if (!entry)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
entry->irq = virq;
|
||||
|
||||
|
2
fs/dax.c
2
fs/dax.c
@ -1213,7 +1213,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
|
||||
case IOMAP_MAPPED:
|
||||
if (iomap.flags & IOMAP_F_NEW) {
|
||||
count_vm_event(PGMAJFAULT);
|
||||
mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
|
||||
count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
|
||||
major = VM_FAULT_MAJOR;
|
||||
}
|
||||
error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev,
|
||||
|
18
fs/dcache.c
18
fs/dcache.c
@ -3546,8 +3546,6 @@ __setup("dhash_entries=", set_dhash_entries);
|
||||
|
||||
static void __init dcache_init_early(void)
|
||||
{
|
||||
unsigned int loop;
|
||||
|
||||
/* If hashes are distributed across NUMA nodes, defer
|
||||
* hash allocation until vmalloc space is available.
|
||||
*/
|
||||
@ -3559,24 +3557,19 @@ static void __init dcache_init_early(void)
|
||||
sizeof(struct hlist_bl_head),
|
||||
dhash_entries,
|
||||
13,
|
||||
HASH_EARLY,
|
||||
HASH_EARLY | HASH_ZERO,
|
||||
&d_hash_shift,
|
||||
&d_hash_mask,
|
||||
0,
|
||||
0);
|
||||
|
||||
for (loop = 0; loop < (1U << d_hash_shift); loop++)
|
||||
INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
|
||||
}
|
||||
|
||||
static void __init dcache_init(void)
|
||||
{
|
||||
unsigned int loop;
|
||||
|
||||
/*
|
||||
/*
|
||||
* A constructor could be added for stable state like the lists,
|
||||
* but it is probably not worth it because of the cache nature
|
||||
* of the dcache.
|
||||
* of the dcache.
|
||||
*/
|
||||
dentry_cache = KMEM_CACHE(dentry,
|
||||
SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT);
|
||||
@ -3590,14 +3583,11 @@ static void __init dcache_init(void)
|
||||
sizeof(struct hlist_bl_head),
|
||||
dhash_entries,
|
||||
13,
|
||||
0,
|
||||
HASH_ZERO,
|
||||
&d_hash_shift,
|
||||
&d_hash_mask,
|
||||
0,
|
||||
0);
|
||||
|
||||
for (loop = 0; loop < (1U << d_hash_shift); loop++)
|
||||
INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
|
||||
}
|
||||
|
||||
/* SLAB cache for __getname() consumers */
|
||||
|
22
fs/file.c
22
fs/file.c
@ -30,21 +30,6 @@ unsigned int sysctl_nr_open_min = BITS_PER_LONG;
|
||||
unsigned int sysctl_nr_open_max =
|
||||
__const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG;
|
||||
|
||||
static void *alloc_fdmem(size_t size)
|
||||
{
|
||||
/*
|
||||
* Very large allocations can stress page reclaim, so fall back to
|
||||
* vmalloc() if the allocation size will be considered "large" by the VM.
|
||||
*/
|
||||
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
|
||||
void *data = kmalloc(size, GFP_KERNEL_ACCOUNT |
|
||||
__GFP_NOWARN | __GFP_NORETRY);
|
||||
if (data != NULL)
|
||||
return data;
|
||||
}
|
||||
return __vmalloc(size, GFP_KERNEL_ACCOUNT, PAGE_KERNEL);
|
||||
}
|
||||
|
||||
static void __free_fdtable(struct fdtable *fdt)
|
||||
{
|
||||
kvfree(fdt->fd);
|
||||
@ -131,13 +116,14 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
|
||||
if (!fdt)
|
||||
goto out;
|
||||
fdt->max_fds = nr;
|
||||
data = alloc_fdmem(nr * sizeof(struct file *));
|
||||
data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
|
||||
if (!data)
|
||||
goto out_fdt;
|
||||
fdt->fd = data;
|
||||
|
||||
data = alloc_fdmem(max_t(size_t,
|
||||
2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES));
|
||||
data = kvmalloc(max_t(size_t,
|
||||
2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!data)
|
||||
goto out_arr;
|
||||
fdt->open_fds = data;
|
||||
|
14
fs/inode.c
14
fs/inode.c
@ -1915,8 +1915,6 @@ __setup("ihash_entries=", set_ihash_entries);
|
||||
*/
|
||||
void __init inode_init_early(void)
|
||||
{
|
||||
unsigned int loop;
|
||||
|
||||
/* If hashes are distributed across NUMA nodes, defer
|
||||
* hash allocation until vmalloc space is available.
|
||||
*/
|
||||
@ -1928,20 +1926,15 @@ void __init inode_init_early(void)
|
||||
sizeof(struct hlist_head),
|
||||
ihash_entries,
|
||||
14,
|
||||
HASH_EARLY,
|
||||
HASH_EARLY | HASH_ZERO,
|
||||
&i_hash_shift,
|
||||
&i_hash_mask,
|
||||
0,
|
||||
0);
|
||||
|
||||
for (loop = 0; loop < (1U << i_hash_shift); loop++)
|
||||
INIT_HLIST_HEAD(&inode_hashtable[loop]);
|
||||
}
|
||||
|
||||
void __init inode_init(void)
|
||||
{
|
||||
unsigned int loop;
|
||||
|
||||
/* inode slab cache */
|
||||
inode_cachep = kmem_cache_create("inode_cache",
|
||||
sizeof(struct inode),
|
||||
@ -1959,14 +1952,11 @@ void __init inode_init(void)
|
||||
sizeof(struct hlist_head),
|
||||
ihash_entries,
|
||||
14,
|
||||
0,
|
||||
HASH_ZERO,
|
||||
&i_hash_shift,
|
||||
&i_hash_mask,
|
||||
0,
|
||||
0);
|
||||
|
||||
for (loop = 0; loop < (1U << i_hash_shift); loop++)
|
||||
INIT_HLIST_HEAD(&inode_hashtable[loop]);
|
||||
}
|
||||
|
||||
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
|
||||
|
@ -3239,7 +3239,6 @@ static void __init init_mount_tree(void)
|
||||
|
||||
void __init mnt_init(void)
|
||||
{
|
||||
unsigned u;
|
||||
int err;
|
||||
|
||||
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
|
||||
@ -3248,22 +3247,17 @@ void __init mnt_init(void)
|
||||
mount_hashtable = alloc_large_system_hash("Mount-cache",
|
||||
sizeof(struct hlist_head),
|
||||
mhash_entries, 19,
|
||||
0,
|
||||
HASH_ZERO,
|
||||
&m_hash_shift, &m_hash_mask, 0, 0);
|
||||
mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
|
||||
sizeof(struct hlist_head),
|
||||
mphash_entries, 19,
|
||||
0,
|
||||
HASH_ZERO,
|
||||
&mp_hash_shift, &mp_hash_mask, 0, 0);
|
||||
|
||||
if (!mount_hashtable || !mountpoint_hashtable)
|
||||
panic("Failed to allocate mount hash table\n");
|
||||
|
||||
for (u = 0; u <= m_hash_mask; u++)
|
||||
INIT_HLIST_HEAD(&mount_hashtable[u]);
|
||||
for (u = 0; u <= mp_hash_mask; u++)
|
||||
INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
|
||||
|
||||
kernfs_init();
|
||||
|
||||
err = sysfs_init();
|
||||
|
@ -89,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_fault *vmf)
|
||||
* -- nyc
|
||||
*/
|
||||
count_vm_event(PGMAJFAULT);
|
||||
mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
|
||||
count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
|
||||
return VM_FAULT_MAJOR;
|
||||
}
|
||||
|
||||
|
@ -426,6 +426,7 @@ static int sc_fop_release(struct inode *inode, struct file *file)
|
||||
struct o2net_sock_container *dummy_sc = sd->dbg_sock;
|
||||
|
||||
o2net_debug_del_sc(dummy_sc);
|
||||
kfree(dummy_sc);
|
||||
return seq_release_private(inode, file);
|
||||
}
|
||||
|
||||
|
@ -136,7 +136,7 @@ struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
|
||||
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
|
||||
int sysfile_type)
|
||||
{
|
||||
int rc = 0;
|
||||
int rc = -ESTALE;
|
||||
struct inode *inode = NULL;
|
||||
struct super_block *sb = osb->sb;
|
||||
struct ocfs2_find_inode_args args;
|
||||
|
@ -25,6 +25,8 @@
|
||||
#ifndef _OCFS2_FS_H
|
||||
#define _OCFS2_FS_H
|
||||
|
||||
#include <linux/magic.h>
|
||||
|
||||
/* Version */
|
||||
#define OCFS2_MAJOR_REV_LEVEL 0
|
||||
#define OCFS2_MINOR_REV_LEVEL 90
|
||||
@ -56,9 +58,6 @@
|
||||
#define OCFS2_MIN_BLOCKSIZE 512
|
||||
#define OCFS2_MAX_BLOCKSIZE OCFS2_MIN_CLUSTERSIZE
|
||||
|
||||
/* Filesystem magic number */
|
||||
#define OCFS2_SUPER_MAGIC 0x7461636f
|
||||
|
||||
/* Object signatures */
|
||||
#define OCFS2_SUPER_BLOCK_SIGNATURE "OCFSV2"
|
||||
#define OCFS2_INODE_SIGNATURE "INODE01"
|
||||
|
@ -631,7 +631,7 @@ static struct attribute *ocfs2_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group ocfs2_attr_group = {
|
||||
static const struct attribute_group ocfs2_attr_group = {
|
||||
.attrs = ocfs2_attrs,
|
||||
};
|
||||
|
||||
|
@ -214,6 +214,7 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
|
||||
* hugepmd ranges.
|
||||
*/
|
||||
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
unsigned long flags,
|
||||
unsigned long reason)
|
||||
@ -224,7 +225,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
|
||||
|
||||
VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
|
||||
|
||||
pte = huge_pte_offset(mm, address);
|
||||
pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
|
||||
if (!pte)
|
||||
goto out;
|
||||
|
||||
@ -243,6 +244,7 @@ out:
|
||||
}
|
||||
#else
|
||||
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
unsigned long flags,
|
||||
unsigned long reason)
|
||||
@ -448,7 +450,8 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
||||
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
|
||||
reason);
|
||||
else
|
||||
must_wait = userfaultfd_huge_must_wait(ctx, vmf->address,
|
||||
must_wait = userfaultfd_huge_must_wait(ctx, vmf->vma,
|
||||
vmf->address,
|
||||
vmf->flags, reason);
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
@ -1114,11 +1117,6 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
|
||||
static void __wake_userfault(struct userfaultfd_ctx *ctx,
|
||||
struct userfaultfd_wake_range *range)
|
||||
{
|
||||
unsigned long start, end;
|
||||
|
||||
start = range->start;
|
||||
end = range->start + range->len;
|
||||
|
||||
spin_lock(&ctx->fault_pending_wqh.lock);
|
||||
/* wake all in the range and autoremove */
|
||||
if (waitqueue_active(&ctx->fault_pending_wqh))
|
||||
|
@ -31,10 +31,12 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
|
||||
return pte_modify(pte, newprot);
|
||||
}
|
||||
|
||||
#ifndef huge_pte_clear
|
||||
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep)
|
||||
pte_t *ptep, unsigned long sz)
|
||||
{
|
||||
pte_clear(mm, addr, ptep);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_GENERIC_HUGETLB_H */
|
||||
|
@ -358,6 +358,7 @@ extern void *alloc_large_system_hash(const char *tablename,
|
||||
#define HASH_EARLY 0x00000001 /* Allocating during early boot? */
|
||||
#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min
|
||||
* shift passed via *_hash_shift */
|
||||
#define HASH_ZERO 0x00000004 /* Zero allocated hash table */
|
||||
|
||||
/* Only NUMA needs hash distribution. 64bit NUMA architectures have
|
||||
* sufficient vmalloc space.
|
||||
|
@ -15,11 +15,3 @@
|
||||
* with any version that can compile the kernel
|
||||
*/
|
||||
#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
|
||||
|
||||
/*
|
||||
* GCC does not warn about unused static inline functions for
|
||||
* -Wunused-function. This turns out to avoid the need for complex #ifdef
|
||||
* directives. Suppress the warning in clang as well.
|
||||
*/
|
||||
#undef inline
|
||||
#define inline inline __attribute__((unused)) notrace
|
||||
|
@ -66,18 +66,22 @@
|
||||
|
||||
/*
|
||||
* Force always-inline if the user requests it so via the .config,
|
||||
* or if gcc is too old:
|
||||
* or if gcc is too old.
|
||||
* GCC does not warn about unused static inline functions for
|
||||
* -Wunused-function. This turns out to avoid the need for complex #ifdef
|
||||
* directives. Suppress the warning in clang as well by using "unused"
|
||||
* function attribute, which is redundant but not harmful for gcc.
|
||||
*/
|
||||
#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
|
||||
!defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
|
||||
#define inline inline __attribute__((always_inline)) notrace
|
||||
#define __inline__ __inline__ __attribute__((always_inline)) notrace
|
||||
#define __inline __inline __attribute__((always_inline)) notrace
|
||||
#define inline inline __attribute__((always_inline,unused)) notrace
|
||||
#define __inline__ __inline__ __attribute__((always_inline,unused)) notrace
|
||||
#define __inline __inline __attribute__((always_inline,unused)) notrace
|
||||
#else
|
||||
/* A lot of inline functions can cause havoc with function tracing */
|
||||
#define inline inline notrace
|
||||
#define __inline__ __inline__ notrace
|
||||
#define __inline __inline notrace
|
||||
#define inline inline __attribute__((unused)) notrace
|
||||
#define __inline__ __inline__ __attribute__((unused)) notrace
|
||||
#define __inline __inline __attribute__((unused)) notrace
|
||||
#endif
|
||||
|
||||
#define __always_inline inline __attribute__((always_inline))
|
||||
|
@ -16,13 +16,10 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include <linux/set_memory.h>
|
||||
|
||||
#include <net/sch_generic.h>
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
|
||||
#include <asm/set_memory.h>
|
||||
#endif
|
||||
|
||||
#include <uapi/linux/filter.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
|
||||
|
@ -432,14 +432,13 @@ static inline void arch_alloc_page(struct page *page, int order) { }
|
||||
#endif
|
||||
|
||||
struct page *
|
||||
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
|
||||
struct zonelist *zonelist, nodemask_t *nodemask);
|
||||
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
|
||||
nodemask_t *nodemask);
|
||||
|
||||
static inline struct page *
|
||||
__alloc_pages(gfp_t gfp_mask, unsigned int order,
|
||||
struct zonelist *zonelist)
|
||||
__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid)
|
||||
{
|
||||
return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
|
||||
return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -452,7 +451,7 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
|
||||
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
|
||||
VM_WARN_ON(!node_online(nid));
|
||||
|
||||
return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
|
||||
return __alloc_pages(gfp_mask, order, nid);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -113,6 +113,7 @@ extern unsigned long thp_get_unmapped_area(struct file *filp,
|
||||
extern void prep_transhuge_page(struct page *page);
|
||||
extern void free_transhuge_page(struct page *page);
|
||||
|
||||
bool can_split_huge_page(struct page *page, int *pextra_pins);
|
||||
int split_huge_page_to_list(struct page *page, struct list_head *list);
|
||||
static inline int split_huge_page(struct page *page)
|
||||
{
|
||||
@ -231,6 +232,12 @@ static inline void prep_transhuge_page(struct page *page) {}
|
||||
|
||||
#define thp_get_unmapped_area NULL
|
||||
|
||||
static inline bool
|
||||
can_split_huge_page(struct page *page, int *pextra_pins)
|
||||
{
|
||||
BUILD_BUG();
|
||||
return false;
|
||||
}
|
||||
static inline int
|
||||
split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||
{
|
||||
|
@ -14,6 +14,30 @@ struct ctl_table;
|
||||
struct user_struct;
|
||||
struct mmu_gather;
|
||||
|
||||
#ifndef is_hugepd
|
||||
/*
|
||||
* Some architectures requires a hugepage directory format that is
|
||||
* required to support multiple hugepage sizes. For example
|
||||
* a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables"
|
||||
* introduced the same on powerpc. This allows for a more flexible hugepage
|
||||
* pagetable layout.
|
||||
*/
|
||||
typedef struct { unsigned long pd; } hugepd_t;
|
||||
#define is_hugepd(hugepd) (0)
|
||||
#define __hugepd(x) ((hugepd_t) { (x) })
|
||||
static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
|
||||
unsigned pdshift, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
|
||||
unsigned pdshift, unsigned long end,
|
||||
int write, struct page **pages, int *nr);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
|
||||
#include <linux/mempolicy.h>
|
||||
@ -113,19 +137,27 @@ extern struct list_head huge_boot_pages;
|
||||
|
||||
pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz);
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz);
|
||||
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
|
||||
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
|
||||
int write);
|
||||
struct page *follow_huge_pd(struct vm_area_struct *vma,
|
||||
unsigned long address, hugepd_t hpd,
|
||||
int flags, int pdshift);
|
||||
struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
||||
pmd_t *pmd, int flags);
|
||||
struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
||||
pud_t *pud, int flags);
|
||||
struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
|
||||
pgd_t *pgd, int flags);
|
||||
|
||||
int pmd_huge(pmd_t pmd);
|
||||
int pud_huge(pud_t pud);
|
||||
unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned long end, pgprot_t newprot);
|
||||
|
||||
bool is_hugetlb_entry_migration(pte_t pte);
|
||||
#else /* !CONFIG_HUGETLB_PAGE */
|
||||
|
||||
static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
|
||||
@ -147,8 +179,10 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
|
||||
static inline void hugetlb_show_meminfo(void)
|
||||
{
|
||||
}
|
||||
#define follow_huge_pd(vma, addr, hpd, flags, pdshift) NULL
|
||||
#define follow_huge_pmd(mm, addr, pmd, flags) NULL
|
||||
#define follow_huge_pud(mm, addr, pud, flags) NULL
|
||||
#define follow_huge_pgd(mm, addr, pgd, flags) NULL
|
||||
#define prepare_hugepage_range(file, addr, len) (-EINVAL)
|
||||
#define pmd_huge(x) 0
|
||||
#define pud_huge(x) 0
|
||||
@ -157,7 +191,7 @@ static inline void hugetlb_show_meminfo(void)
|
||||
#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
|
||||
#define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
|
||||
src_addr, pagep) ({ BUG(); 0; })
|
||||
#define huge_pte_offset(mm, address) 0
|
||||
#define huge_pte_offset(mm, address, sz) 0
|
||||
static inline int dequeue_hwpoisoned_huge_page(struct page *page)
|
||||
{
|
||||
return 0;
|
||||
@ -217,29 +251,6 @@ static inline int pud_write(pud_t pud)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef is_hugepd
|
||||
/*
|
||||
* Some architectures requires a hugepage directory format that is
|
||||
* required to support multiple hugepage sizes. For example
|
||||
* a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables"
|
||||
* introduced the same on powerpc. This allows for a more flexible hugepage
|
||||
* pagetable layout.
|
||||
*/
|
||||
typedef struct { unsigned long pd; } hugepd_t;
|
||||
#define is_hugepd(hugepd) (0)
|
||||
#define __hugepd(x) ((hugepd_t) { (x) })
|
||||
static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
|
||||
unsigned pdshift, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
|
||||
unsigned pdshift, unsigned long end,
|
||||
int write, struct page **pages, int *nr);
|
||||
#endif
|
||||
|
||||
#define HUGETLB_ANON_FILE "anon_hugepage"
|
||||
|
||||
enum {
|
||||
@ -466,7 +477,11 @@ extern int dissolve_free_huge_pages(unsigned long start_pfn,
|
||||
static inline bool hugepage_migration_supported(struct hstate *h)
|
||||
{
|
||||
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
|
||||
return huge_page_shift(h) == PMD_SHIFT;
|
||||
if ((huge_page_shift(h) == PMD_SHIFT) ||
|
||||
(huge_page_shift(h) == PGDIR_SHIFT))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
@ -501,6 +516,14 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
|
||||
{
|
||||
atomic_long_sub(l, &mm->hugetlb_usage);
|
||||
}
|
||||
|
||||
#ifndef set_huge_swap_pte_at
|
||||
static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte, unsigned long sz)
|
||||
{
|
||||
set_huge_pte_at(mm, addr, ptep, pte);
|
||||
}
|
||||
#endif
|
||||
#else /* CONFIG_HUGETLB_PAGE */
|
||||
struct hstate {};
|
||||
#define alloc_huge_page(v, a, r) NULL
|
||||
@ -518,6 +541,11 @@ struct hstate {};
|
||||
#define vma_mmu_pagesize(v) PAGE_SIZE
|
||||
#define huge_page_order(h) 0
|
||||
#define huge_page_shift(h) PAGE_SHIFT
|
||||
static inline bool hstate_is_gigantic(struct hstate *h)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline unsigned int pages_per_huge_page(struct hstate *h)
|
||||
{
|
||||
return 1;
|
||||
@ -545,6 +573,11 @@ static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m)
|
||||
static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte, unsigned long sz)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
static inline spinlock_t *huge_pte_lock(struct hstate *h,
|
||||
|
@ -22,6 +22,7 @@
|
||||
#define __KMEMLEAK_H
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_KMEMLEAK
|
||||
|
||||
@ -30,6 +31,8 @@ extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
|
||||
gfp_t gfp) __ref;
|
||||
extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
|
||||
gfp_t gfp) __ref;
|
||||
extern void kmemleak_vmalloc(const struct vm_struct *area, size_t size,
|
||||
gfp_t gfp) __ref;
|
||||
extern void kmemleak_free(const void *ptr) __ref;
|
||||
extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
|
||||
extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
|
||||
@ -81,6 +84,10 @@ static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
|
||||
gfp_t gfp)
|
||||
{
|
||||
}
|
||||
static inline void kmemleak_vmalloc(const struct vm_struct *area, size_t size,
|
||||
gfp_t gfp)
|
||||
{
|
||||
}
|
||||
static inline void kmemleak_free(const void *ptr)
|
||||
{
|
||||
}
|
||||
|
@ -57,10 +57,6 @@ struct memblock {
|
||||
|
||||
extern struct memblock memblock;
|
||||
extern int memblock_debug;
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
/* If movable_node boot option specified */
|
||||
extern bool movable_node_enabled;
|
||||
#endif /* CONFIG_MOVABLE_NODE */
|
||||
|
||||
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
|
||||
#define __init_memblock __meminit
|
||||
@ -169,27 +165,11 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
|
||||
i != (u64)ULLONG_MAX; \
|
||||
__next_reserved_mem_region(&i, p_start, p_end))
|
||||
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
static inline bool memblock_is_hotpluggable(struct memblock_region *m)
|
||||
{
|
||||
return m->flags & MEMBLOCK_HOTPLUG;
|
||||
}
|
||||
|
||||
static inline bool __init_memblock movable_node_is_enabled(void)
|
||||
{
|
||||
return movable_node_enabled;
|
||||
}
|
||||
#else
|
||||
static inline bool memblock_is_hotpluggable(struct memblock_region *m)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool movable_node_is_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool memblock_is_mirror(struct memblock_region *m)
|
||||
{
|
||||
return m->flags & MEMBLOCK_MIRROR;
|
||||
@ -296,7 +276,6 @@ phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
|
||||
|
||||
phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
|
||||
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
/*
|
||||
* Set the allocation direction to bottom-up or top-down.
|
||||
*/
|
||||
@ -314,10 +293,6 @@ static inline bool memblock_bottom_up(void)
|
||||
{
|
||||
return memblock.bottom_up;
|
||||
}
|
||||
#else
|
||||
static inline void __init memblock_set_bottom_up(bool enable) {}
|
||||
static inline bool memblock_bottom_up(void) { return false; }
|
||||
#endif
|
||||
|
||||
/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
|
||||
#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0)
|
||||
|
@ -26,7 +26,8 @@
|
||||
#include <linux/page_counter.h>
|
||||
#include <linux/vmpressure.h>
|
||||
#include <linux/eventfd.h>
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/vmstat.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/page-flags.h>
|
||||
|
||||
@ -44,8 +45,6 @@ enum memcg_stat_item {
|
||||
MEMCG_SOCK,
|
||||
/* XXX: why are these zone and not node counters? */
|
||||
MEMCG_KERNEL_STACK_KB,
|
||||
MEMCG_SLAB_RECLAIMABLE,
|
||||
MEMCG_SLAB_UNRECLAIMABLE,
|
||||
MEMCG_NR_STAT,
|
||||
};
|
||||
|
||||
@ -100,11 +99,16 @@ struct mem_cgroup_reclaim_iter {
|
||||
unsigned int generation;
|
||||
};
|
||||
|
||||
struct lruvec_stat {
|
||||
long count[NR_VM_NODE_STAT_ITEMS];
|
||||
};
|
||||
|
||||
/*
|
||||
* per-zone information in memory controller.
|
||||
*/
|
||||
struct mem_cgroup_per_node {
|
||||
struct lruvec lruvec;
|
||||
struct lruvec_stat __percpu *lruvec_stat;
|
||||
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
|
||||
|
||||
struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
|
||||
@ -357,6 +361,17 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
|
||||
}
|
||||
struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
|
||||
|
||||
static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
|
||||
{
|
||||
struct mem_cgroup_per_node *mz;
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return NULL;
|
||||
|
||||
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||
return mz->memcg;
|
||||
}
|
||||
|
||||
/**
|
||||
* parent_mem_cgroup - find the accounting parent of a memcg
|
||||
* @memcg: memcg whose parent to find
|
||||
@ -487,6 +502,13 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void __mod_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx, int val)
|
||||
{
|
||||
if (!mem_cgroup_disabled())
|
||||
__this_cpu_add(memcg->stat->count[idx], val);
|
||||
}
|
||||
|
||||
static inline void mod_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx, int val)
|
||||
{
|
||||
@ -494,18 +516,6 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
|
||||
this_cpu_add(memcg->stat->count[idx], val);
|
||||
}
|
||||
|
||||
static inline void inc_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
mod_memcg_state(memcg, idx, 1);
|
||||
}
|
||||
|
||||
static inline void dec_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
mod_memcg_state(memcg, idx, -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* mod_memcg_page_state - update page state statistics
|
||||
* @page: the page
|
||||
@ -523,6 +533,13 @@ static inline void dec_memcg_state(struct mem_cgroup *memcg,
|
||||
*
|
||||
* Kernel pages are an exception to this, since they'll never move.
|
||||
*/
|
||||
static inline void __mod_memcg_page_state(struct page *page,
|
||||
enum memcg_stat_item idx, int val)
|
||||
{
|
||||
if (page->mem_cgroup)
|
||||
__mod_memcg_state(page->mem_cgroup, idx, val);
|
||||
}
|
||||
|
||||
static inline void mod_memcg_page_state(struct page *page,
|
||||
enum memcg_stat_item idx, int val)
|
||||
{
|
||||
@ -530,24 +547,99 @@ static inline void mod_memcg_page_state(struct page *page,
|
||||
mod_memcg_state(page->mem_cgroup, idx, val);
|
||||
}
|
||||
|
||||
static inline void inc_memcg_page_state(struct page *page,
|
||||
enum memcg_stat_item idx)
|
||||
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
|
||||
enum node_stat_item idx)
|
||||
{
|
||||
mod_memcg_page_state(page, idx, 1);
|
||||
struct mem_cgroup_per_node *pn;
|
||||
long val = 0;
|
||||
int cpu;
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return node_page_state(lruvec_pgdat(lruvec), idx);
|
||||
|
||||
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||
for_each_possible_cpu(cpu)
|
||||
val += per_cpu(pn->lruvec_stat->count[idx], cpu);
|
||||
|
||||
if (val < 0)
|
||||
val = 0;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void dec_memcg_page_state(struct page *page,
|
||||
enum memcg_stat_item idx)
|
||||
static inline void __mod_lruvec_state(struct lruvec *lruvec,
|
||||
enum node_stat_item idx, int val)
|
||||
{
|
||||
mod_memcg_page_state(page, idx, -1);
|
||||
struct mem_cgroup_per_node *pn;
|
||||
|
||||
__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||
__mod_memcg_state(pn->memcg, idx, val);
|
||||
__this_cpu_add(pn->lruvec_stat->count[idx], val);
|
||||
}
|
||||
|
||||
static inline void mod_lruvec_state(struct lruvec *lruvec,
|
||||
enum node_stat_item idx, int val)
|
||||
{
|
||||
struct mem_cgroup_per_node *pn;
|
||||
|
||||
mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||
mod_memcg_state(pn->memcg, idx, val);
|
||||
this_cpu_add(pn->lruvec_stat->count[idx], val);
|
||||
}
|
||||
|
||||
static inline void __mod_lruvec_page_state(struct page *page,
|
||||
enum node_stat_item idx, int val)
|
||||
{
|
||||
struct mem_cgroup_per_node *pn;
|
||||
|
||||
__mod_node_page_state(page_pgdat(page), idx, val);
|
||||
if (mem_cgroup_disabled() || !page->mem_cgroup)
|
||||
return;
|
||||
__mod_memcg_state(page->mem_cgroup, idx, val);
|
||||
pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
|
||||
__this_cpu_add(pn->lruvec_stat->count[idx], val);
|
||||
}
|
||||
|
||||
static inline void mod_lruvec_page_state(struct page *page,
|
||||
enum node_stat_item idx, int val)
|
||||
{
|
||||
struct mem_cgroup_per_node *pn;
|
||||
|
||||
mod_node_page_state(page_pgdat(page), idx, val);
|
||||
if (mem_cgroup_disabled() || !page->mem_cgroup)
|
||||
return;
|
||||
mod_memcg_state(page->mem_cgroup, idx, val);
|
||||
pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
|
||||
this_cpu_add(pn->lruvec_stat->count[idx], val);
|
||||
}
|
||||
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned);
|
||||
|
||||
static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
|
||||
enum vm_event_item idx)
|
||||
static inline void count_memcg_events(struct mem_cgroup *memcg,
|
||||
enum vm_event_item idx,
|
||||
unsigned long count)
|
||||
{
|
||||
if (!mem_cgroup_disabled())
|
||||
this_cpu_add(memcg->stat->events[idx], count);
|
||||
}
|
||||
|
||||
static inline void count_memcg_page_event(struct page *page,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
if (page->mem_cgroup)
|
||||
count_memcg_events(page->mem_cgroup, idx, 1);
|
||||
}
|
||||
|
||||
static inline void count_memcg_event_mm(struct mm_struct *mm,
|
||||
enum vm_event_item idx)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
@ -556,8 +648,11 @@ static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
|
||||
|
||||
rcu_read_lock();
|
||||
memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
|
||||
if (likely(memcg))
|
||||
if (likely(memcg)) {
|
||||
this_cpu_inc(memcg->stat->events[idx]);
|
||||
if (idx == OOM_KILL)
|
||||
cgroup_file_notify(&memcg->events_file);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
@ -675,6 +770,11 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
|
||||
{
|
||||
return true;
|
||||
@ -745,19 +845,21 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void __mod_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx,
|
||||
int nr)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mod_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx,
|
||||
int nr)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void inc_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void dec_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx)
|
||||
static inline void __mod_memcg_page_state(struct page *page,
|
||||
enum memcg_stat_item idx,
|
||||
int nr)
|
||||
{
|
||||
}
|
||||
|
||||
@ -767,14 +869,34 @@ static inline void mod_memcg_page_state(struct page *page,
|
||||
{
|
||||
}
|
||||
|
||||
static inline void inc_memcg_page_state(struct page *page,
|
||||
enum memcg_stat_item idx)
|
||||
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
|
||||
enum node_stat_item idx)
|
||||
{
|
||||
return node_page_state(lruvec_pgdat(lruvec), idx);
|
||||
}
|
||||
|
||||
static inline void dec_memcg_page_state(struct page *page,
|
||||
enum memcg_stat_item idx)
|
||||
static inline void __mod_lruvec_state(struct lruvec *lruvec,
|
||||
enum node_stat_item idx, int val)
|
||||
{
|
||||
__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
|
||||
}
|
||||
|
||||
static inline void mod_lruvec_state(struct lruvec *lruvec,
|
||||
enum node_stat_item idx, int val)
|
||||
{
|
||||
mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
|
||||
}
|
||||
|
||||
static inline void __mod_lruvec_page_state(struct page *page,
|
||||
enum node_stat_item idx, int val)
|
||||
{
|
||||
__mod_node_page_state(page_pgdat(page), idx, val);
|
||||
}
|
||||
|
||||
static inline void mod_lruvec_page_state(struct page *page,
|
||||
enum node_stat_item idx, int val)
|
||||
{
|
||||
mod_node_page_state(page_pgdat(page), idx, val);
|
||||
}
|
||||
|
||||
static inline
|
||||
@ -789,12 +911,119 @@ static inline void mem_cgroup_split_huge_fixup(struct page *head)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void count_memcg_events(struct mem_cgroup *memcg,
|
||||
enum vm_event_item idx,
|
||||
unsigned long count)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void count_memcg_page_event(struct page *page,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
}
|
||||
|
||||
static inline
|
||||
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
|
||||
void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
static inline void __inc_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
__mod_memcg_state(memcg, idx, 1);
|
||||
}
|
||||
|
||||
static inline void __dec_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
__mod_memcg_state(memcg, idx, -1);
|
||||
}
|
||||
|
||||
static inline void __inc_memcg_page_state(struct page *page,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
__mod_memcg_page_state(page, idx, 1);
|
||||
}
|
||||
|
||||
static inline void __dec_memcg_page_state(struct page *page,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
__mod_memcg_page_state(page, idx, -1);
|
||||
}
|
||||
|
||||
static inline void __inc_lruvec_state(struct lruvec *lruvec,
|
||||
enum node_stat_item idx)
|
||||
{
|
||||
__mod_lruvec_state(lruvec, idx, 1);
|
||||
}
|
||||
|
||||
static inline void __dec_lruvec_state(struct lruvec *lruvec,
|
||||
enum node_stat_item idx)
|
||||
{
|
||||
__mod_lruvec_state(lruvec, idx, -1);
|
||||
}
|
||||
|
||||
static inline void __inc_lruvec_page_state(struct page *page,
|
||||
enum node_stat_item idx)
|
||||
{
|
||||
__mod_lruvec_page_state(page, idx, 1);
|
||||
}
|
||||
|
||||
static inline void __dec_lruvec_page_state(struct page *page,
|
||||
enum node_stat_item idx)
|
||||
{
|
||||
__mod_lruvec_page_state(page, idx, -1);
|
||||
}
|
||||
|
||||
static inline void inc_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
mod_memcg_state(memcg, idx, 1);
|
||||
}
|
||||
|
||||
static inline void dec_memcg_state(struct mem_cgroup *memcg,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
mod_memcg_state(memcg, idx, -1);
|
||||
}
|
||||
|
||||
static inline void inc_memcg_page_state(struct page *page,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
mod_memcg_page_state(page, idx, 1);
|
||||
}
|
||||
|
||||
static inline void dec_memcg_page_state(struct page *page,
|
||||
enum memcg_stat_item idx)
|
||||
{
|
||||
mod_memcg_page_state(page, idx, -1);
|
||||
}
|
||||
|
||||
static inline void inc_lruvec_state(struct lruvec *lruvec,
|
||||
enum node_stat_item idx)
|
||||
{
|
||||
mod_lruvec_state(lruvec, idx, 1);
|
||||
}
|
||||
|
||||
static inline void dec_lruvec_state(struct lruvec *lruvec,
|
||||
enum node_stat_item idx)
|
||||
{
|
||||
mod_lruvec_state(lruvec, idx, -1);
|
||||
}
|
||||
|
||||
static inline void inc_lruvec_page_state(struct page *page,
|
||||
enum node_stat_item idx)
|
||||
{
|
||||
mod_lruvec_page_state(page, idx, 1);
|
||||
}
|
||||
|
||||
static inline void dec_lruvec_page_state(struct page *page,
|
||||
enum node_stat_item idx)
|
||||
{
|
||||
mod_lruvec_page_state(page, idx, -1);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
|
||||
struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
|
||||
@ -886,19 +1115,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
|
||||
return memcg ? memcg->kmemcg_id : -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* memcg_kmem_update_page_stat - update kmem page state statistics
|
||||
* @page: the page
|
||||
* @idx: page state item to account
|
||||
* @val: number of pages (positive or negative)
|
||||
*/
|
||||
static inline void memcg_kmem_update_page_stat(struct page *page,
|
||||
enum memcg_stat_item idx, int val)
|
||||
{
|
||||
if (memcg_kmem_enabled() && page->mem_cgroup)
|
||||
this_cpu_add(page->mem_cgroup->stat->count[idx], val);
|
||||
}
|
||||
|
||||
#else
|
||||
#define for_each_memcg_cache_index(_idx) \
|
||||
for (; NULL; )
|
||||
@ -921,10 +1137,6 @@ static inline void memcg_put_cache_ids(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void memcg_kmem_update_page_stat(struct page *page,
|
||||
enum memcg_stat_item idx, int val)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
|
||||
|
||||
#endif /* _LINUX_MEMCONTROL_H */
|
||||
|
@ -14,6 +14,20 @@ struct memory_block;
|
||||
struct resource;
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* Return page for the valid pfn only if the page is online. All pfn
|
||||
* walkers which rely on the fully initialized page->flags and others
|
||||
* should use this rather than pfn_valid && pfn_to_page
|
||||
*/
|
||||
#define pfn_to_online_page(pfn) \
|
||||
({ \
|
||||
struct page *___page = NULL; \
|
||||
unsigned long ___nr = pfn_to_section_nr(pfn); \
|
||||
\
|
||||
if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\
|
||||
___page = pfn_to_page(pfn); \
|
||||
___page; \
|
||||
})
|
||||
|
||||
/*
|
||||
* Types for free bootmem stored in page->lru.next. These have to be in
|
||||
@ -101,6 +115,12 @@ extern void __online_page_free(struct page *page);
|
||||
extern int try_online_node(int nid);
|
||||
|
||||
extern bool memhp_auto_online;
|
||||
/* If movable_node boot option specified */
|
||||
extern bool movable_node_enabled;
|
||||
static inline bool movable_node_is_enabled(void)
|
||||
{
|
||||
return movable_node_enabled;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
extern bool is_pageblock_removable_nolock(struct page *page);
|
||||
@ -109,9 +129,9 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
|
||||
unsigned long nr_pages);
|
||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||
|
||||
/* reasonably generic interface to expand the physical pages in a zone */
|
||||
extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
|
||||
unsigned long nr_pages);
|
||||
/* reasonably generic interface to expand the physical pages */
|
||||
extern int __add_pages(int nid, unsigned long start_pfn,
|
||||
unsigned long nr_pages, bool want_memblock);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
extern int memory_add_physaddr_to_nid(u64 start);
|
||||
@ -203,6 +223,14 @@ extern void set_zone_contiguous(struct zone *zone);
|
||||
extern void clear_zone_contiguous(struct zone *zone);
|
||||
|
||||
#else /* ! CONFIG_MEMORY_HOTPLUG */
|
||||
#define pfn_to_online_page(pfn) \
|
||||
({ \
|
||||
struct page *___page = NULL; \
|
||||
if (pfn_valid(pfn)) \
|
||||
___page = pfn_to_page(pfn); \
|
||||
___page; \
|
||||
})
|
||||
|
||||
/*
|
||||
* Stub functions for when hotplug is off
|
||||
*/
|
||||
@ -244,6 +272,10 @@ static inline void put_online_mems(void) {}
|
||||
static inline void mem_hotplug_begin(void) {}
|
||||
static inline void mem_hotplug_done(void) {}
|
||||
|
||||
static inline bool movable_node_is_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* ! CONFIG_MEMORY_HOTPLUG */
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
@ -274,18 +306,19 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
|
||||
void *arg, int (*func)(struct memory_block *, void *));
|
||||
extern int add_memory(int nid, u64 start, u64 size);
|
||||
extern int add_memory_resource(int nid, struct resource *resource, bool online);
|
||||
extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
|
||||
bool for_device);
|
||||
extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
|
||||
extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock);
|
||||
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
|
||||
unsigned long nr_pages);
|
||||
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
|
||||
extern bool is_memblock_offlined(struct memory_block *mem);
|
||||
extern void remove_memory(int nid, u64 start, u64 size);
|
||||
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn);
|
||||
extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn);
|
||||
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
|
||||
unsigned long map_offset);
|
||||
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
|
||||
unsigned long pnum);
|
||||
extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
|
||||
enum zone_type target, int *zone_shift);
|
||||
|
||||
extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
|
||||
int online_type);
|
||||
extern struct zone *default_zone_for_pfn(int nid, unsigned long pfn,
|
||||
unsigned long nr_pages);
|
||||
#endif /* __LINUX_MEMORY_HOTPLUG_H */
|
||||
|
@ -142,11 +142,10 @@ bool vma_policy_mof(struct vm_area_struct *vma);
|
||||
|
||||
extern void numa_default_policy(void);
|
||||
extern void numa_policy_init(void);
|
||||
extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
|
||||
enum mpol_rebind_step step);
|
||||
extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new);
|
||||
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
|
||||
|
||||
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
|
||||
extern int huge_node(struct vm_area_struct *vma,
|
||||
unsigned long addr, gfp_t gfp_flags,
|
||||
struct mempolicy **mpol, nodemask_t **nodemask);
|
||||
extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
|
||||
@ -260,8 +259,7 @@ static inline void numa_default_policy(void)
|
||||
}
|
||||
|
||||
static inline void mpol_rebind_task(struct task_struct *tsk,
|
||||
const nodemask_t *new,
|
||||
enum mpol_rebind_step step)
|
||||
const nodemask_t *new)
|
||||
{
|
||||
}
|
||||
|
||||
@ -269,13 +267,13 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
|
||||
static inline int huge_node(struct vm_area_struct *vma,
|
||||
unsigned long addr, gfp_t gfp_flags,
|
||||
struct mempolicy **mpol, nodemask_t **nodemask)
|
||||
{
|
||||
*mpol = NULL;
|
||||
*nodemask = NULL;
|
||||
return node_zonelist(0, gfp_flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
|
||||
|
@ -125,8 +125,6 @@ enum zone_stat_item {
|
||||
NR_ZONE_UNEVICTABLE,
|
||||
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
|
||||
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
|
||||
NR_SLAB_RECLAIMABLE,
|
||||
NR_SLAB_UNRECLAIMABLE,
|
||||
NR_PAGETABLE, /* used for pagetables */
|
||||
NR_KERNEL_STACK_KB, /* measured in KiB */
|
||||
/* Second 128 byte cacheline */
|
||||
@ -152,6 +150,8 @@ enum node_stat_item {
|
||||
NR_INACTIVE_FILE, /* " " " " " */
|
||||
NR_ACTIVE_FILE, /* " " " " " */
|
||||
NR_UNEVICTABLE, /* " " " " " */
|
||||
NR_SLAB_RECLAIMABLE,
|
||||
NR_SLAB_UNRECLAIMABLE,
|
||||
NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
|
||||
NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
|
||||
WORKINGSET_REFAULT,
|
||||
@ -532,6 +532,22 @@ static inline bool zone_is_empty(struct zone *zone)
|
||||
return zone->spanned_pages == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty
|
||||
* intersection with the given zone
|
||||
*/
|
||||
static inline bool zone_intersects(struct zone *zone,
|
||||
unsigned long start_pfn, unsigned long nr_pages)
|
||||
{
|
||||
if (zone_is_empty(zone))
|
||||
return false;
|
||||
if (start_pfn >= zone_end_pfn(zone) ||
|
||||
start_pfn + nr_pages <= zone->zone_start_pfn)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The "priority" of VM scanning is how much of the queues we will scan in one
|
||||
* go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
|
||||
@ -772,7 +788,7 @@ enum memmap_context {
|
||||
MEMMAP_EARLY,
|
||||
MEMMAP_HOTPLUG,
|
||||
};
|
||||
extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
|
||||
extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
|
||||
unsigned long size);
|
||||
|
||||
extern void lruvec_init(struct lruvec *lruvec);
|
||||
@ -1144,9 +1160,10 @@ extern unsigned long usemap_size(void);
|
||||
*/
|
||||
#define SECTION_MARKED_PRESENT (1UL<<0)
|
||||
#define SECTION_HAS_MEM_MAP (1UL<<1)
|
||||
#define SECTION_MAP_LAST_BIT (1UL<<2)
|
||||
#define SECTION_IS_ONLINE (1UL<<2)
|
||||
#define SECTION_MAP_LAST_BIT (1UL<<3)
|
||||
#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
|
||||
#define SECTION_NID_SHIFT 2
|
||||
#define SECTION_NID_SHIFT 3
|
||||
|
||||
static inline struct page *__section_mem_map_addr(struct mem_section *section)
|
||||
{
|
||||
@ -1175,11 +1192,30 @@ static inline int valid_section_nr(unsigned long nr)
|
||||
return valid_section(__nr_to_section(nr));
|
||||
}
|
||||
|
||||
static inline int online_section(struct mem_section *section)
|
||||
{
|
||||
return (section && (section->section_mem_map & SECTION_IS_ONLINE));
|
||||
}
|
||||
|
||||
static inline int online_section_nr(unsigned long nr)
|
||||
{
|
||||
return online_section(__nr_to_section(nr));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline struct mem_section *__pfn_to_section(unsigned long pfn)
|
||||
{
|
||||
return __nr_to_section(pfn_to_section_nr(pfn));
|
||||
}
|
||||
|
||||
extern int __highest_present_section_nr;
|
||||
|
||||
#ifndef CONFIG_HAVE_ARCH_PFN_VALID
|
||||
static inline int pfn_valid(unsigned long pfn)
|
||||
{
|
||||
@ -1251,10 +1287,15 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
|
||||
#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
|
||||
/*
|
||||
* pfn_valid() is meant to be able to tell if a given PFN has valid memmap
|
||||
* associated with it or not. In FLATMEM, it is expected that holes always
|
||||
* have valid memmap as long as there is valid PFNs either side of the hole.
|
||||
* In SPARSEMEM, it is assumed that a valid section has a memmap for the
|
||||
* entire section.
|
||||
* associated with it or not. This means that a struct page exists for this
|
||||
* pfn. The caller cannot assume the page is fully initialized in general.
|
||||
* Hotplugable pages might not have been onlined yet. pfn_to_online_page()
|
||||
* will ensure the struct page is fully online and initialized. Special pages
|
||||
* (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly.
|
||||
*
|
||||
* In FLATMEM, it is expected that holes always have valid memmap as long as
|
||||
* there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed
|
||||
* that a valid section has a memmap for the entire section.
|
||||
*
|
||||
* However, an ARM, and maybe other embedded architectures in the future
|
||||
* free memmap backing holes to save memory on the assumption the memmap is
|
||||
|
@ -30,9 +30,38 @@ struct memory_block;
|
||||
extern struct node *node_devices[];
|
||||
typedef void (*node_registration_func_t)(struct node *);
|
||||
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
|
||||
extern int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages);
|
||||
#else
|
||||
static inline int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void unregister_node(struct node *node);
|
||||
#ifdef CONFIG_NUMA
|
||||
extern int register_one_node(int nid);
|
||||
/* Core of the node registration - only memory hotplug should use this */
|
||||
extern int __register_one_node(int nid);
|
||||
|
||||
/* Registers an online node */
|
||||
static inline int register_one_node(int nid)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
if (node_online(nid)) {
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
|
||||
error = __register_one_node(nid);
|
||||
if (error)
|
||||
return error;
|
||||
/* link memory sections under this node */
|
||||
error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
extern void unregister_one_node(int nid);
|
||||
extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
|
||||
extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
|
||||
@ -46,6 +75,10 @@ extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
|
||||
node_registration_func_t unregister);
|
||||
#endif
|
||||
#else
|
||||
static inline int __register_one_node(int nid)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int register_one_node(int nid)
|
||||
{
|
||||
return 0;
|
||||
|
@ -387,11 +387,7 @@ enum node_states {
|
||||
#else
|
||||
N_HIGH_MEMORY = N_NORMAL_MEMORY,
|
||||
#endif
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
N_MEMORY, /* The node has memory(regular, high, movable) */
|
||||
#else
|
||||
N_MEMORY = N_HIGH_MEMORY,
|
||||
#endif
|
||||
N_CPU, /* The node has one or more cpus */
|
||||
NR_NODE_STATES
|
||||
};
|
||||
|
@ -326,11 +326,14 @@ PAGEFLAG_FALSE(HighMem)
|
||||
#ifdef CONFIG_SWAP
|
||||
static __always_inline int PageSwapCache(struct page *page)
|
||||
{
|
||||
#ifdef CONFIG_THP_SWAP
|
||||
page = compound_head(page);
|
||||
#endif
|
||||
return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags);
|
||||
|
||||
}
|
||||
SETPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
|
||||
CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
|
||||
SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
|
||||
CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
|
||||
#else
|
||||
PAGEFLAG_FALSE(SwapCache)
|
||||
#endif
|
||||
|
@ -904,7 +904,7 @@ struct task_struct {
|
||||
#ifdef CONFIG_NUMA
|
||||
/* Protected by alloc_lock: */
|
||||
struct mempolicy *mempolicy;
|
||||
short il_next;
|
||||
short il_prev;
|
||||
short pref_node_fork;
|
||||
#endif
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
|
20
include/linux/set_memory.h
Normal file
20
include/linux/set_memory.h
Normal file
@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright 2017, Michael Ellerman, IBM Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation;
|
||||
*/
|
||||
#ifndef _LINUX_SET_MEMORY_H_
|
||||
#define _LINUX_SET_MEMORY_H_
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
|
||||
#include <asm/set_memory.h>
|
||||
#else
|
||||
static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
|
||||
static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
|
||||
static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
|
||||
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_SET_MEMORY_H_ */
|
@ -41,12 +41,31 @@ struct kmem_cache_cpu {
|
||||
void **freelist; /* Pointer to next available object */
|
||||
unsigned long tid; /* Globally unique transaction id */
|
||||
struct page *page; /* The slab from which we are allocating */
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
struct page *partial; /* Partially allocated frozen slabs */
|
||||
#endif
|
||||
#ifdef CONFIG_SLUB_STATS
|
||||
unsigned stat[NR_SLUB_STAT_ITEMS];
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
#define slub_percpu_partial(c) ((c)->partial)
|
||||
|
||||
#define slub_set_percpu_partial(c, p) \
|
||||
({ \
|
||||
slub_percpu_partial(c) = (p)->next; \
|
||||
})
|
||||
|
||||
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
|
||||
#else
|
||||
#define slub_percpu_partial(c) NULL
|
||||
|
||||
#define slub_set_percpu_partial(c, p)
|
||||
|
||||
#define slub_percpu_partial_read_once(c) NULL
|
||||
#endif // CONFIG_SLUB_CPU_PARTIAL
|
||||
|
||||
/*
|
||||
* Word size structure that can be atomically updated or read and that
|
||||
* contains both the order and the number of objects that a slab of the
|
||||
@ -67,7 +86,9 @@ struct kmem_cache {
|
||||
int size; /* The size of an object including meta data */
|
||||
int object_size; /* The size of an object without meta data */
|
||||
int offset; /* Free pointer offset. */
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
int cpu_partial; /* Number of per cpu partial objects to keep around */
|
||||
#endif
|
||||
struct kmem_cache_order_objects oo;
|
||||
|
||||
/* Allocation and freeing of slabs */
|
||||
@ -79,9 +100,9 @@ struct kmem_cache {
|
||||
int inuse; /* Offset to metadata */
|
||||
int align; /* Alignment */
|
||||
int reserved; /* Reserved bytes at the end of slabs */
|
||||
int red_left_pad; /* Left redzone padding size */
|
||||
const char *name; /* Name (only for display!) */
|
||||
struct list_head list; /* List of slab caches */
|
||||
int red_left_pad; /* Left redzone padding size */
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct kobject kobj; /* For sysfs */
|
||||
struct work_struct kobj_remove_work;
|
||||
@ -112,6 +133,17 @@ struct kmem_cache {
|
||||
struct kmem_cache_node *node[MAX_NUMNODES];
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
#define slub_cpu_partial(s) ((s)->cpu_partial)
|
||||
#define slub_set_cpu_partial(s, n) \
|
||||
({ \
|
||||
slub_cpu_partial(s) = (n); \
|
||||
})
|
||||
#else
|
||||
#define slub_cpu_partial(s) (0)
|
||||
#define slub_set_cpu_partial(s, n)
|
||||
#endif // CONFIG_SLUB_CPU_PARTIAL
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
#define SLAB_SUPPORTS_SYSFS
|
||||
void sysfs_slab_release(struct kmem_cache *);
|
||||
|
@ -353,7 +353,7 @@ extern struct address_space *swapper_spaces[];
|
||||
>> SWAP_ADDRESS_SPACE_SHIFT])
|
||||
extern unsigned long total_swapcache_pages(void);
|
||||
extern void show_swap_cache_info(void);
|
||||
extern int add_to_swap(struct page *, struct list_head *list);
|
||||
extern int add_to_swap(struct page *page);
|
||||
extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
|
||||
extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
|
||||
extern void __delete_from_swap_cache(struct page *);
|
||||
@ -386,15 +386,15 @@ static inline long get_nr_swap_pages(void)
|
||||
}
|
||||
|
||||
extern void si_swapinfo(struct sysinfo *);
|
||||
extern swp_entry_t get_swap_page(void);
|
||||
extern swp_entry_t get_swap_page(struct page *page);
|
||||
extern void put_swap_page(struct page *page, swp_entry_t entry);
|
||||
extern swp_entry_t get_swap_page_of_type(int);
|
||||
extern int get_swap_pages(int n, swp_entry_t swp_entries[]);
|
||||
extern int get_swap_pages(int n, bool cluster, swp_entry_t swp_entries[]);
|
||||
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
|
||||
extern void swap_shmem_alloc(swp_entry_t);
|
||||
extern int swap_duplicate(swp_entry_t);
|
||||
extern int swapcache_prepare(swp_entry_t);
|
||||
extern void swap_free(swp_entry_t);
|
||||
extern void swapcache_free(swp_entry_t);
|
||||
extern void swapcache_free_entries(swp_entry_t *entries, int n);
|
||||
extern int free_swap_and_cache(swp_entry_t);
|
||||
extern int swap_type_of(dev_t, sector_t, struct block_device **);
|
||||
@ -453,7 +453,7 @@ static inline void swap_free(swp_entry_t swp)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void swapcache_free(swp_entry_t swp)
|
||||
static inline void put_swap_page(struct page *page, swp_entry_t swp)
|
||||
{
|
||||
}
|
||||
|
||||
@ -473,7 +473,7 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int add_to_swap(struct page *page, struct list_head *list)
|
||||
static inline int add_to_swap(struct page *page)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -515,7 +515,7 @@ static inline int try_to_free_swap(struct page *page)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline swp_entry_t get_swap_page(void)
|
||||
static inline swp_entry_t get_swap_page(struct page *page)
|
||||
{
|
||||
swp_entry_t entry;
|
||||
entry.val = 0;
|
||||
@ -548,7 +548,7 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
|
||||
#ifdef CONFIG_MEMCG_SWAP
|
||||
extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
|
||||
extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
|
||||
extern void mem_cgroup_uncharge_swap(swp_entry_t entry);
|
||||
extern void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages);
|
||||
extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
|
||||
extern bool mem_cgroup_swap_full(struct page *page);
|
||||
#else
|
||||
@ -562,7 +562,8 @@ static inline int mem_cgroup_try_charge_swap(struct page *page,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
|
||||
static inline void mem_cgroup_uncharge_swap(swp_entry_t entry,
|
||||
unsigned int nr_pages)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -7,7 +7,8 @@
|
||||
|
||||
extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
|
||||
unsigned short old, unsigned short new);
|
||||
extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
|
||||
extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
|
||||
unsigned int nr_ents);
|
||||
extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
|
||||
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
|
||||
extern void swap_cgroup_swapoff(int type);
|
||||
@ -15,7 +16,8 @@ extern void swap_cgroup_swapoff(int type);
|
||||
#else
|
||||
|
||||
static inline
|
||||
unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
|
||||
unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
|
||||
unsigned int nr_ents)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -41,6 +41,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
|
||||
KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
|
||||
PAGEOUTRUN, PGROTATED,
|
||||
DROP_PAGECACHE, DROP_SLAB,
|
||||
OOM_KILL,
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
NUMA_PTE_UPDATES,
|
||||
NUMA_HUGE_PTE_UPDATES,
|
||||
|
@ -3,7 +3,6 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/vm_event_item.h>
|
||||
#include <linux/atomic.h>
|
||||
|
@ -42,6 +42,7 @@
|
||||
#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */
|
||||
#define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */
|
||||
#define NFS_SUPER_MAGIC 0x6969
|
||||
#define OCFS2_SUPER_MAGIC 0x7461636f
|
||||
#define OPENPROM_SUPER_MAGIC 0x9fa1
|
||||
#define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */
|
||||
#define QNX6_SUPER_MAGIC 0x68191122 /* qnx6 fs detection */
|
||||
|
@ -24,13 +24,6 @@ enum {
|
||||
MPOL_MAX, /* always last member of enum */
|
||||
};
|
||||
|
||||
enum mpol_rebind_step {
|
||||
MPOL_REBIND_ONCE, /* do rebind work at once(not by two step) */
|
||||
MPOL_REBIND_STEP1, /* first step(set all the newly nodes) */
|
||||
MPOL_REBIND_STEP2, /* second step(clean all the disallowed nodes)*/
|
||||
MPOL_REBIND_NSTEP,
|
||||
};
|
||||
|
||||
/* Flags for set_mempolicy */
|
||||
#define MPOL_F_STATIC_NODES (1 << 15)
|
||||
#define MPOL_F_RELATIVE_NODES (1 << 14)
|
||||
@ -65,7 +58,6 @@ enum mpol_rebind_step {
|
||||
*/
|
||||
#define MPOL_F_SHARED (1 << 0) /* identify shared policies */
|
||||
#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
|
||||
#define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */
|
||||
#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */
|
||||
#define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */
|
||||
|
||||
|
14
init/Kconfig
14
init/Kconfig
@ -1548,6 +1548,20 @@ config SLOB
|
||||
|
||||
endchoice
|
||||
|
||||
config SLAB_MERGE_DEFAULT
|
||||
bool "Allow slab caches to be merged"
|
||||
default y
|
||||
help
|
||||
For reduced kernel memory fragmentation, slab caches can be
|
||||
merged when they share the same size and other characteristics.
|
||||
This carries a risk of kernel heap overflows being able to
|
||||
overwrite objects from merged caches (and more easily control
|
||||
cache layout), which makes such heap attacks easier to exploit
|
||||
by attackers. By keeping caches unmerged, these kinds of exploits
|
||||
can usually only damage objects in the same cache. To disable
|
||||
merging at runtime, "slab_nomerge" can be passed on the kernel
|
||||
command line.
|
||||
|
||||
config SLAB_FREELIST_RANDOM
|
||||
default n
|
||||
depends on SLAB || SLUB
|
||||
|
@ -1038,40 +1038,25 @@ static void cpuset_post_attach(void)
|
||||
* @tsk: the task to change
|
||||
* @newmems: new nodes that the task will be set
|
||||
*
|
||||
* In order to avoid seeing no nodes if the old and new nodes are disjoint,
|
||||
* we structure updates as setting all new allowed nodes, then clearing newly
|
||||
* disallowed ones.
|
||||
* We use the mems_allowed_seq seqlock to safely update both tsk->mems_allowed
|
||||
* and rebind an eventual tasks' mempolicy. If the task is allocating in
|
||||
* parallel, it might temporarily see an empty intersection, which results in
|
||||
* a seqlock check and retry before OOM or allocation failure.
|
||||
*/
|
||||
static void cpuset_change_task_nodemask(struct task_struct *tsk,
|
||||
nodemask_t *newmems)
|
||||
{
|
||||
bool need_loop;
|
||||
|
||||
task_lock(tsk);
|
||||
/*
|
||||
* Determine if a loop is necessary if another thread is doing
|
||||
* read_mems_allowed_begin(). If at least one node remains unchanged and
|
||||
* tsk does not have a mempolicy, then an empty nodemask will not be
|
||||
* possible when mems_allowed is larger than a word.
|
||||
*/
|
||||
need_loop = task_has_mempolicy(tsk) ||
|
||||
!nodes_intersects(*newmems, tsk->mems_allowed);
|
||||
|
||||
if (need_loop) {
|
||||
local_irq_disable();
|
||||
write_seqcount_begin(&tsk->mems_allowed_seq);
|
||||
}
|
||||
local_irq_disable();
|
||||
write_seqcount_begin(&tsk->mems_allowed_seq);
|
||||
|
||||
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
|
||||
mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
|
||||
|
||||
mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
|
||||
mpol_rebind_task(tsk, newmems);
|
||||
tsk->mems_allowed = *newmems;
|
||||
|
||||
if (need_loop) {
|
||||
write_seqcount_end(&tsk->mems_allowed_seq);
|
||||
local_irq_enable();
|
||||
}
|
||||
write_seqcount_end(&tsk->mems_allowed_seq);
|
||||
local_irq_enable();
|
||||
|
||||
task_unlock(tsk);
|
||||
}
|
||||
|
@ -51,7 +51,6 @@
|
||||
#include <linux/task_io_accounting_ops.h>
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/fs_struct.h>
|
||||
#include <linux/userfaultfd_k.h>
|
||||
#include <linux/init_task.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <trace/events/sched.h>
|
||||
|
@ -69,7 +69,7 @@ static inline int init_kernel_text(unsigned long addr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int core_kernel_text(unsigned long addr)
|
||||
int notrace core_kernel_text(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)_stext &&
|
||||
addr < (unsigned long)_etext)
|
||||
|
@ -326,8 +326,8 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
|
||||
}
|
||||
|
||||
/* All stack pages belong to the same memcg. */
|
||||
memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB,
|
||||
account * (THREAD_SIZE / 1024));
|
||||
mod_memcg_page_state(vm->pages[0], MEMCG_KERNEL_STACK_KB,
|
||||
account * (THREAD_SIZE / 1024));
|
||||
} else {
|
||||
/*
|
||||
* All stack pages are in the same zone and belong to the
|
||||
@ -338,8 +338,8 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
|
||||
mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
|
||||
THREAD_SIZE / 1024 * account);
|
||||
|
||||
memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
|
||||
account * (THREAD_SIZE / 1024));
|
||||
mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB,
|
||||
account * (THREAD_SIZE / 1024));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -193,7 +193,8 @@ void __init __pv_init_lock_hash(void)
|
||||
*/
|
||||
pv_lock_hash = alloc_large_system_hash("PV qspinlock",
|
||||
sizeof(struct pv_hash_entry),
|
||||
pv_hash_size, 0, HASH_EARLY,
|
||||
pv_hash_size, 0,
|
||||
HASH_EARLY | HASH_ZERO,
|
||||
&pv_lock_hash_bits, NULL,
|
||||
pv_hash_size, pv_hash_size);
|
||||
}
|
||||
|
@ -358,7 +358,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
|
||||
goto err_pfn_remap;
|
||||
|
||||
mem_hotplug_begin();
|
||||
error = arch_add_memory(nid, align_start, align_size, true);
|
||||
error = arch_add_memory(nid, align_start, align_size, false);
|
||||
if (!error)
|
||||
move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
|
||||
align_start >> PAGE_SHIFT,
|
||||
align_size >> PAGE_SHIFT);
|
||||
mem_hotplug_done();
|
||||
if (error)
|
||||
goto err_add_memory;
|
||||
|
@ -49,9 +49,7 @@
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#ifdef CONFIG_STRICT_MODULE_RWX
|
||||
#include <asm/set_memory.h>
|
||||
#endif
|
||||
#include <linux/set_memory.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <linux/license.h>
|
||||
#include <asm/sections.h>
|
||||
|
@ -575,16 +575,13 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
|
||||
*/
|
||||
void __init pidhash_init(void)
|
||||
{
|
||||
unsigned int i, pidhash_size;
|
||||
unsigned int pidhash_size;
|
||||
|
||||
pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
|
||||
HASH_EARLY | HASH_SMALL,
|
||||
HASH_EARLY | HASH_SMALL | HASH_ZERO,
|
||||
&pidhash_shift, NULL,
|
||||
0, 4096);
|
||||
pidhash_size = 1U << pidhash_shift;
|
||||
|
||||
for (i = 0; i < pidhash_size; i++)
|
||||
INIT_HLIST_HEAD(&pid_hash[i]);
|
||||
}
|
||||
|
||||
void __init pidmap_init(void)
|
||||
|
@ -30,15 +30,13 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/set_memory.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/io.h>
|
||||
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
|
||||
#include <asm/set_memory.h>
|
||||
#endif
|
||||
|
||||
#include "power.h"
|
||||
|
||||
|
38
mm/Kconfig
38
mm/Kconfig
@ -149,32 +149,6 @@ config NO_BOOTMEM
|
||||
config MEMORY_ISOLATION
|
||||
bool
|
||||
|
||||
config MOVABLE_NODE
|
||||
bool "Enable to assign a node which has only movable memory"
|
||||
depends on HAVE_MEMBLOCK
|
||||
depends on NO_BOOTMEM
|
||||
depends on X86_64 || OF_EARLY_FLATTREE || MEMORY_HOTPLUG
|
||||
depends on NUMA
|
||||
default n
|
||||
help
|
||||
Allow a node to have only movable memory. Pages used by the kernel,
|
||||
such as direct mapping pages cannot be migrated. So the corresponding
|
||||
memory device cannot be hotplugged. This option allows the following
|
||||
two things:
|
||||
- When the system is booting, node full of hotpluggable memory can
|
||||
be arranged to have only movable memory so that the whole node can
|
||||
be hot-removed. (need movable_node boot option specified).
|
||||
- After the system is up, the option allows users to online all the
|
||||
memory of a node as movable memory so that the whole node can be
|
||||
hot-removed.
|
||||
|
||||
Users who don't use the memory hotplug feature are fine with this
|
||||
option on since they don't specify movable_node boot option or they
|
||||
don't online memory as movable.
|
||||
|
||||
Say Y here if you want to hotplug a whole node.
|
||||
Say N here if you want kernel to use memory on all nodes evenly.
|
||||
|
||||
#
|
||||
# Only be set on architectures that have completely implemented memory hotplug
|
||||
# feature. If you are not sure, don't touch it.
|
||||
@ -446,6 +420,18 @@ choice
|
||||
benefit.
|
||||
endchoice
|
||||
|
||||
config ARCH_WANTS_THP_SWAP
|
||||
def_bool n
|
||||
|
||||
config THP_SWAP
|
||||
def_bool y
|
||||
depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP
|
||||
help
|
||||
Swap transparent huge pages in one piece, without splitting.
|
||||
XXX: For now this only does clustered swap space allocation.
|
||||
|
||||
For selection by architectures with reasonable THP sizes.
|
||||
|
||||
config TRANSPARENT_HUGE_PAGECACHE
|
||||
def_bool y
|
||||
depends on TRANSPARENT_HUGEPAGE
|
||||
|
@ -236,10 +236,9 @@ static void __reset_isolation_suitable(struct zone *zone)
|
||||
|
||||
cond_resched();
|
||||
|
||||
if (!pfn_valid(pfn))
|
||||
page = pfn_to_online_page(pfn);
|
||||
if (!page)
|
||||
continue;
|
||||
|
||||
page = pfn_to_page(pfn);
|
||||
if (zone != page_zone(page))
|
||||
continue;
|
||||
|
||||
|
@ -2265,7 +2265,7 @@ int filemap_fault(struct vm_fault *vmf)
|
||||
/* No page in the page cache at all */
|
||||
do_sync_mmap_readahead(vmf->vma, ra, file, offset);
|
||||
count_vm_event(PGMAJFAULT);
|
||||
mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
|
||||
count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
|
||||
ret = VM_FAULT_MAJOR;
|
||||
retry_find:
|
||||
page = find_get_page(mapping, offset);
|
||||
|
203
mm/gup.c
203
mm/gup.c
@ -208,68 +208,16 @@ no_page:
|
||||
return no_page_table(vma, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* follow_page_mask - look up a page descriptor from a user-virtual address
|
||||
* @vma: vm_area_struct mapping @address
|
||||
* @address: virtual address to look up
|
||||
* @flags: flags modifying lookup behaviour
|
||||
* @page_mask: on output, *page_mask is set according to the size of the page
|
||||
*
|
||||
* @flags can have FOLL_ flags set, defined in <linux/mm.h>
|
||||
*
|
||||
* Returns the mapped (struct page *), %NULL if no mapping exists, or
|
||||
* an error pointer if there is a mapping to something not represented
|
||||
* by a page descriptor (see also vm_normal_page()).
|
||||
*/
|
||||
struct page *follow_page_mask(struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned int flags,
|
||||
unsigned int *page_mask)
|
||||
static struct page *follow_pmd_mask(struct vm_area_struct *vma,
|
||||
unsigned long address, pud_t *pudp,
|
||||
unsigned int flags, unsigned int *page_mask)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
spinlock_t *ptl;
|
||||
struct page *page;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
||||
*page_mask = 0;
|
||||
|
||||
page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
|
||||
if (!IS_ERR(page)) {
|
||||
BUG_ON(flags & FOLL_GET);
|
||||
return page;
|
||||
}
|
||||
|
||||
pgd = pgd_offset(mm, address);
|
||||
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
|
||||
return no_page_table(vma, flags);
|
||||
p4d = p4d_offset(pgd, address);
|
||||
if (p4d_none(*p4d))
|
||||
return no_page_table(vma, flags);
|
||||
BUILD_BUG_ON(p4d_huge(*p4d));
|
||||
if (unlikely(p4d_bad(*p4d)))
|
||||
return no_page_table(vma, flags);
|
||||
pud = pud_offset(p4d, address);
|
||||
if (pud_none(*pud))
|
||||
return no_page_table(vma, flags);
|
||||
if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
|
||||
page = follow_huge_pud(mm, address, pud, flags);
|
||||
if (page)
|
||||
return page;
|
||||
return no_page_table(vma, flags);
|
||||
}
|
||||
if (pud_devmap(*pud)) {
|
||||
ptl = pud_lock(mm, pud);
|
||||
page = follow_devmap_pud(vma, address, pud, flags);
|
||||
spin_unlock(ptl);
|
||||
if (page)
|
||||
return page;
|
||||
}
|
||||
if (unlikely(pud_bad(*pud)))
|
||||
return no_page_table(vma, flags);
|
||||
|
||||
pmd = pmd_offset(pud, address);
|
||||
pmd = pmd_offset(pudp, address);
|
||||
if (pmd_none(*pmd))
|
||||
return no_page_table(vma, flags);
|
||||
if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
|
||||
@ -278,6 +226,14 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
|
||||
return page;
|
||||
return no_page_table(vma, flags);
|
||||
}
|
||||
if (is_hugepd(__hugepd(pmd_val(*pmd)))) {
|
||||
page = follow_huge_pd(vma, address,
|
||||
__hugepd(pmd_val(*pmd)), flags,
|
||||
PMD_SHIFT);
|
||||
if (page)
|
||||
return page;
|
||||
return no_page_table(vma, flags);
|
||||
}
|
||||
if (pmd_devmap(*pmd)) {
|
||||
ptl = pmd_lock(mm, pmd);
|
||||
page = follow_devmap_pmd(vma, address, pmd, flags);
|
||||
@ -319,13 +275,131 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
|
||||
return ret ? ERR_PTR(ret) :
|
||||
follow_page_pte(vma, address, pmd, flags);
|
||||
}
|
||||
|
||||
page = follow_trans_huge_pmd(vma, address, pmd, flags);
|
||||
spin_unlock(ptl);
|
||||
*page_mask = HPAGE_PMD_NR - 1;
|
||||
return page;
|
||||
}
|
||||
|
||||
|
||||
static struct page *follow_pud_mask(struct vm_area_struct *vma,
|
||||
unsigned long address, p4d_t *p4dp,
|
||||
unsigned int flags, unsigned int *page_mask)
|
||||
{
|
||||
pud_t *pud;
|
||||
spinlock_t *ptl;
|
||||
struct page *page;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
||||
pud = pud_offset(p4dp, address);
|
||||
if (pud_none(*pud))
|
||||
return no_page_table(vma, flags);
|
||||
if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
|
||||
page = follow_huge_pud(mm, address, pud, flags);
|
||||
if (page)
|
||||
return page;
|
||||
return no_page_table(vma, flags);
|
||||
}
|
||||
if (is_hugepd(__hugepd(pud_val(*pud)))) {
|
||||
page = follow_huge_pd(vma, address,
|
||||
__hugepd(pud_val(*pud)), flags,
|
||||
PUD_SHIFT);
|
||||
if (page)
|
||||
return page;
|
||||
return no_page_table(vma, flags);
|
||||
}
|
||||
if (pud_devmap(*pud)) {
|
||||
ptl = pud_lock(mm, pud);
|
||||
page = follow_devmap_pud(vma, address, pud, flags);
|
||||
spin_unlock(ptl);
|
||||
if (page)
|
||||
return page;
|
||||
}
|
||||
if (unlikely(pud_bad(*pud)))
|
||||
return no_page_table(vma, flags);
|
||||
|
||||
return follow_pmd_mask(vma, address, pud, flags, page_mask);
|
||||
}
|
||||
|
||||
|
||||
static struct page *follow_p4d_mask(struct vm_area_struct *vma,
|
||||
unsigned long address, pgd_t *pgdp,
|
||||
unsigned int flags, unsigned int *page_mask)
|
||||
{
|
||||
p4d_t *p4d;
|
||||
struct page *page;
|
||||
|
||||
p4d = p4d_offset(pgdp, address);
|
||||
if (p4d_none(*p4d))
|
||||
return no_page_table(vma, flags);
|
||||
BUILD_BUG_ON(p4d_huge(*p4d));
|
||||
if (unlikely(p4d_bad(*p4d)))
|
||||
return no_page_table(vma, flags);
|
||||
|
||||
if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
|
||||
page = follow_huge_pd(vma, address,
|
||||
__hugepd(p4d_val(*p4d)), flags,
|
||||
P4D_SHIFT);
|
||||
if (page)
|
||||
return page;
|
||||
return no_page_table(vma, flags);
|
||||
}
|
||||
return follow_pud_mask(vma, address, p4d, flags, page_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* follow_page_mask - look up a page descriptor from a user-virtual address
|
||||
* @vma: vm_area_struct mapping @address
|
||||
* @address: virtual address to look up
|
||||
* @flags: flags modifying lookup behaviour
|
||||
* @page_mask: on output, *page_mask is set according to the size of the page
|
||||
*
|
||||
* @flags can have FOLL_ flags set, defined in <linux/mm.h>
|
||||
*
|
||||
* Returns the mapped (struct page *), %NULL if no mapping exists, or
|
||||
* an error pointer if there is a mapping to something not represented
|
||||
* by a page descriptor (see also vm_normal_page()).
|
||||
*/
|
||||
struct page *follow_page_mask(struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned int flags,
|
||||
unsigned int *page_mask)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
struct page *page;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
||||
*page_mask = 0;
|
||||
|
||||
/* make this handle hugepd */
|
||||
page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
|
||||
if (!IS_ERR(page)) {
|
||||
BUG_ON(flags & FOLL_GET);
|
||||
return page;
|
||||
}
|
||||
|
||||
pgd = pgd_offset(mm, address);
|
||||
|
||||
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
|
||||
return no_page_table(vma, flags);
|
||||
|
||||
if (pgd_huge(*pgd)) {
|
||||
page = follow_huge_pgd(mm, address, pgd, flags);
|
||||
if (page)
|
||||
return page;
|
||||
return no_page_table(vma, flags);
|
||||
}
|
||||
if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
|
||||
page = follow_huge_pd(vma, address,
|
||||
__hugepd(pgd_val(*pgd)), flags,
|
||||
PGDIR_SHIFT);
|
||||
if (page)
|
||||
return page;
|
||||
return no_page_table(vma, flags);
|
||||
}
|
||||
|
||||
return follow_p4d_mask(vma, address, pgd, flags, page_mask);
|
||||
}
|
||||
|
||||
static int get_gate_page(struct mm_struct *mm, unsigned long address,
|
||||
unsigned int gup_flags, struct vm_area_struct **vma,
|
||||
struct page **page)
|
||||
@ -1349,16 +1423,15 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
|
||||
return __gup_device_huge_pmd(orig, addr, end, pages, nr);
|
||||
|
||||
refs = 0;
|
||||
head = pmd_page(orig);
|
||||
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
||||
page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
||||
do {
|
||||
VM_BUG_ON_PAGE(compound_head(page) != head, page);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
page++;
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
head = compound_head(pmd_page(orig));
|
||||
if (!page_cache_add_speculative(head, refs)) {
|
||||
*nr -= refs;
|
||||
return 0;
|
||||
@ -1388,16 +1461,15 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
|
||||
return __gup_device_huge_pud(orig, addr, end, pages, nr);
|
||||
|
||||
refs = 0;
|
||||
head = pud_page(orig);
|
||||
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
do {
|
||||
VM_BUG_ON_PAGE(compound_head(page) != head, page);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
page++;
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
head = compound_head(pud_page(orig));
|
||||
if (!page_cache_add_speculative(head, refs)) {
|
||||
*nr -= refs;
|
||||
return 0;
|
||||
@ -1426,16 +1498,15 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
|
||||
|
||||
BUILD_BUG_ON(pgd_devmap(orig));
|
||||
refs = 0;
|
||||
head = pgd_page(orig);
|
||||
page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
|
||||
page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
|
||||
do {
|
||||
VM_BUG_ON_PAGE(compound_head(page) != head, page);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
page++;
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
head = compound_head(pgd_page(orig));
|
||||
if (!page_cache_add_speculative(head, refs)) {
|
||||
*nr -= refs;
|
||||
return 0;
|
||||
|
@ -1575,8 +1575,8 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
get_page(page);
|
||||
spin_unlock(ptl);
|
||||
split_huge_page(page);
|
||||
put_page(page);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto out_unlocked;
|
||||
}
|
||||
|
||||
@ -2203,7 +2203,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
|
||||
* atomic_set() here would be safe on all archs (and not only on x86),
|
||||
* it's safer to use atomic_inc()/atomic_add().
|
||||
*/
|
||||
if (PageAnon(head)) {
|
||||
if (PageAnon(head) && !PageSwapCache(head)) {
|
||||
page_ref_inc(page_tail);
|
||||
} else {
|
||||
/* Additional pin to radix tree */
|
||||
@ -2214,6 +2214,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
|
||||
page_tail->flags |= (head->flags &
|
||||
((1L << PG_referenced) |
|
||||
(1L << PG_swapbacked) |
|
||||
(1L << PG_swapcache) |
|
||||
(1L << PG_mlocked) |
|
||||
(1L << PG_uptodate) |
|
||||
(1L << PG_active) |
|
||||
@ -2276,7 +2277,11 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||
ClearPageCompound(head);
|
||||
/* See comment in __split_huge_page_tail() */
|
||||
if (PageAnon(head)) {
|
||||
page_ref_inc(head);
|
||||
/* Additional pin to radix tree of swap cache */
|
||||
if (PageSwapCache(head))
|
||||
page_ref_add(head, 2);
|
||||
else
|
||||
page_ref_inc(head);
|
||||
} else {
|
||||
/* Additional pin to radix tree */
|
||||
page_ref_add(head, 2);
|
||||
@ -2385,6 +2390,21 @@ int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Racy check whether the huge page can be split */
|
||||
bool can_split_huge_page(struct page *page, int *pextra_pins)
|
||||
{
|
||||
int extra_pins;
|
||||
|
||||
/* Additional pins from radix tree */
|
||||
if (PageAnon(page))
|
||||
extra_pins = PageSwapCache(page) ? HPAGE_PMD_NR : 0;
|
||||
else
|
||||
extra_pins = HPAGE_PMD_NR;
|
||||
if (pextra_pins)
|
||||
*pextra_pins = extra_pins;
|
||||
return total_mapcount(page) == page_count(page) - extra_pins - 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function splits huge page into normal pages. @page can point to any
|
||||
* subpage of huge page to split. Split doesn't change the position of @page.
|
||||
@ -2432,7 +2452,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
extra_pins = 0;
|
||||
mapping = NULL;
|
||||
anon_vma_lock_write(anon_vma);
|
||||
} else {
|
||||
@ -2444,8 +2463,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Addidional pins from radix tree */
|
||||
extra_pins = HPAGE_PMD_NR;
|
||||
anon_vma = NULL;
|
||||
i_mmap_lock_read(mapping);
|
||||
}
|
||||
@ -2454,7 +2471,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||
* Racy check if we can split the page, before freeze_page() will
|
||||
* split PMDs
|
||||
*/
|
||||
if (total_mapcount(head) != page_count(head) - extra_pins - 1) {
|
||||
if (!can_split_huge_page(head, &extra_pins)) {
|
||||
ret = -EBUSY;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
98
mm/hugetlb.c
98
mm/hugetlb.c
@ -867,7 +867,7 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
|
||||
h->free_huge_pages_node[nid]++;
|
||||
}
|
||||
|
||||
static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
|
||||
static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
@ -887,6 +887,22 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
|
||||
return page;
|
||||
}
|
||||
|
||||
static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
|
||||
{
|
||||
struct page *page;
|
||||
int node;
|
||||
|
||||
if (nid != NUMA_NO_NODE)
|
||||
return dequeue_huge_page_node_exact(h, nid);
|
||||
|
||||
for_each_online_node(node) {
|
||||
page = dequeue_huge_page_node_exact(h, node);
|
||||
if (page)
|
||||
return page;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Movability of hugepages depends on migration support. */
|
||||
static inline gfp_t htlb_alloc_mask(struct hstate *h)
|
||||
{
|
||||
@ -904,6 +920,8 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
|
||||
struct page *page = NULL;
|
||||
struct mempolicy *mpol;
|
||||
nodemask_t *nodemask;
|
||||
gfp_t gfp_mask;
|
||||
int nid;
|
||||
struct zonelist *zonelist;
|
||||
struct zone *zone;
|
||||
struct zoneref *z;
|
||||
@ -924,12 +942,13 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
|
||||
|
||||
retry_cpuset:
|
||||
cpuset_mems_cookie = read_mems_allowed_begin();
|
||||
zonelist = huge_zonelist(vma, address,
|
||||
htlb_alloc_mask(h), &mpol, &nodemask);
|
||||
gfp_mask = htlb_alloc_mask(h);
|
||||
nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
|
||||
zonelist = node_zonelist(nid, gfp_mask);
|
||||
|
||||
for_each_zone_zonelist_nodemask(zone, z, zonelist,
|
||||
MAX_NR_ZONES - 1, nodemask) {
|
||||
if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) {
|
||||
if (cpuset_zone_allowed(zone, gfp_mask)) {
|
||||
page = dequeue_huge_page_node(h, zone_to_nid(zone));
|
||||
if (page) {
|
||||
if (avoid_reserve)
|
||||
@ -1024,9 +1043,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
|
||||
((node = hstate_next_node_to_free(hs, mask)) || 1); \
|
||||
nr_nodes--)
|
||||
|
||||
#if defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE) && \
|
||||
((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || \
|
||||
defined(CONFIG_CMA))
|
||||
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
|
||||
static void destroy_compound_gigantic_page(struct page *page,
|
||||
unsigned int order)
|
||||
{
|
||||
@ -1158,8 +1175,7 @@ static int alloc_fresh_gigantic_page(struct hstate *h,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool gigantic_page_supported(void) { return true; }
|
||||
#else
|
||||
#else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
|
||||
static inline bool gigantic_page_supported(void) { return false; }
|
||||
static inline void free_gigantic_page(struct page *page, unsigned int order) { }
|
||||
static inline void destroy_compound_gigantic_page(struct page *page,
|
||||
@ -1545,13 +1561,13 @@ static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
|
||||
do {
|
||||
struct page *page;
|
||||
struct mempolicy *mpol;
|
||||
struct zonelist *zl;
|
||||
int nid;
|
||||
nodemask_t *nodemask;
|
||||
|
||||
cpuset_mems_cookie = read_mems_allowed_begin();
|
||||
zl = huge_zonelist(vma, addr, gfp, &mpol, &nodemask);
|
||||
nid = huge_node(vma, addr, gfp, &mpol, &nodemask);
|
||||
mpol_cond_put(mpol);
|
||||
page = __alloc_pages_nodemask(gfp, order, zl, nodemask);
|
||||
page = __alloc_pages_nodemask(gfp, order, nid, nodemask);
|
||||
if (page)
|
||||
return page;
|
||||
} while (read_mems_allowed_retry(cpuset_mems_cookie));
|
||||
@ -3185,17 +3201,17 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
|
||||
update_mmu_cache(vma, address, ptep);
|
||||
}
|
||||
|
||||
static int is_hugetlb_entry_migration(pte_t pte)
|
||||
bool is_hugetlb_entry_migration(pte_t pte)
|
||||
{
|
||||
swp_entry_t swp;
|
||||
|
||||
if (huge_pte_none(pte) || pte_present(pte))
|
||||
return 0;
|
||||
return false;
|
||||
swp = pte_to_swp_entry(pte);
|
||||
if (non_swap_entry(swp) && is_migration_entry(swp))
|
||||
return 1;
|
||||
return true;
|
||||
else
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int is_hugetlb_entry_hwpoisoned(pte_t pte)
|
||||
@ -3233,7 +3249,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||
|
||||
for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
|
||||
spinlock_t *src_ptl, *dst_ptl;
|
||||
src_pte = huge_pte_offset(src, addr);
|
||||
src_pte = huge_pte_offset(src, addr, sz);
|
||||
if (!src_pte)
|
||||
continue;
|
||||
dst_pte = huge_pte_alloc(dst, addr, sz);
|
||||
@ -3263,9 +3279,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||
*/
|
||||
make_migration_entry_read(&swp_entry);
|
||||
entry = swp_entry_to_pte(swp_entry);
|
||||
set_huge_pte_at(src, addr, src_pte, entry);
|
||||
set_huge_swap_pte_at(src, addr, src_pte,
|
||||
entry, sz);
|
||||
}
|
||||
set_huge_pte_at(dst, addr, dst_pte, entry);
|
||||
set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
|
||||
} else {
|
||||
if (cow) {
|
||||
huge_ptep_set_wrprotect(src, addr, src_pte);
|
||||
@ -3317,7 +3334,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
|
||||
address = start;
|
||||
for (; address < end; address += sz) {
|
||||
ptep = huge_pte_offset(mm, address);
|
||||
ptep = huge_pte_offset(mm, address, sz);
|
||||
if (!ptep)
|
||||
continue;
|
||||
|
||||
@ -3338,7 +3355,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
* unmapped and its refcount is dropped, so just clear pte here.
|
||||
*/
|
||||
if (unlikely(!pte_present(pte))) {
|
||||
huge_pte_clear(mm, address, ptep);
|
||||
huge_pte_clear(mm, address, ptep, sz);
|
||||
spin_unlock(ptl);
|
||||
continue;
|
||||
}
|
||||
@ -3535,7 +3552,8 @@ retry_avoidcopy:
|
||||
unmap_ref_private(mm, vma, old_page, address);
|
||||
BUG_ON(huge_pte_none(pte));
|
||||
spin_lock(ptl);
|
||||
ptep = huge_pte_offset(mm, address & huge_page_mask(h));
|
||||
ptep = huge_pte_offset(mm, address & huge_page_mask(h),
|
||||
huge_page_size(h));
|
||||
if (likely(ptep &&
|
||||
pte_same(huge_ptep_get(ptep), pte)))
|
||||
goto retry_avoidcopy;
|
||||
@ -3574,7 +3592,8 @@ retry_avoidcopy:
|
||||
* before the page tables are altered
|
||||
*/
|
||||
spin_lock(ptl);
|
||||
ptep = huge_pte_offset(mm, address & huge_page_mask(h));
|
||||
ptep = huge_pte_offset(mm, address & huge_page_mask(h),
|
||||
huge_page_size(h));
|
||||
if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
|
||||
ClearPagePrivate(new_page);
|
||||
|
||||
@ -3861,7 +3880,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
|
||||
address &= huge_page_mask(h);
|
||||
|
||||
ptep = huge_pte_offset(mm, address);
|
||||
ptep = huge_pte_offset(mm, address, huge_page_size(h));
|
||||
if (ptep) {
|
||||
entry = huge_ptep_get(ptep);
|
||||
if (unlikely(is_hugetlb_entry_migration(entry))) {
|
||||
@ -4118,7 +4137,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
*
|
||||
* Note that page table lock is not held when pte is null.
|
||||
*/
|
||||
pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
|
||||
pte = huge_pte_offset(mm, vaddr & huge_page_mask(h),
|
||||
huge_page_size(h));
|
||||
if (pte)
|
||||
ptl = huge_pte_lock(h, mm, pte);
|
||||
absent = !pte || huge_pte_none(huge_ptep_get(pte));
|
||||
@ -4257,7 +4277,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
|
||||
i_mmap_lock_write(vma->vm_file->f_mapping);
|
||||
for (; address < end; address += huge_page_size(h)) {
|
||||
spinlock_t *ptl;
|
||||
ptep = huge_pte_offset(mm, address);
|
||||
ptep = huge_pte_offset(mm, address, huge_page_size(h));
|
||||
if (!ptep)
|
||||
continue;
|
||||
ptl = huge_pte_lock(h, mm, ptep);
|
||||
@ -4279,7 +4299,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
|
||||
|
||||
make_migration_entry_read(&entry);
|
||||
newpte = swp_entry_to_pte(entry);
|
||||
set_huge_pte_at(mm, address, ptep, newpte);
|
||||
set_huge_swap_pte_at(mm, address, ptep,
|
||||
newpte, huge_page_size(h));
|
||||
pages++;
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
@ -4521,7 +4542,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
|
||||
|
||||
saddr = page_table_shareable(svma, vma, addr, idx);
|
||||
if (saddr) {
|
||||
spte = huge_pte_offset(svma->vm_mm, saddr);
|
||||
spte = huge_pte_offset(svma->vm_mm, saddr,
|
||||
vma_mmu_pagesize(svma));
|
||||
if (spte) {
|
||||
get_page(virt_to_page(spte));
|
||||
break;
|
||||
@ -4617,7 +4639,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
@ -4652,6 +4675,14 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address,
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
struct page * __weak
|
||||
follow_huge_pd(struct vm_area_struct *vma,
|
||||
unsigned long address, hugepd_t hpd, int flags, int pdshift)
|
||||
{
|
||||
WARN(1, "hugepd follow called with no support for hugepage directory format\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct page * __weak
|
||||
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
||||
pmd_t *pmd, int flags)
|
||||
@ -4699,6 +4730,15 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
||||
return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
struct page * __weak
|
||||
follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags)
|
||||
{
|
||||
if (flags & FOLL_GET)
|
||||
return NULL;
|
||||
|
||||
return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
|
||||
/*
|
||||
|
136
mm/kmemleak.c
136
mm/kmemleak.c
@ -150,7 +150,7 @@ struct kmemleak_scan_area {
|
||||
*/
|
||||
struct kmemleak_object {
|
||||
spinlock_t lock;
|
||||
unsigned long flags; /* object status flags */
|
||||
unsigned int flags; /* object status flags */
|
||||
struct list_head object_list;
|
||||
struct list_head gray_list;
|
||||
struct rb_node rb_node;
|
||||
@ -159,6 +159,8 @@ struct kmemleak_object {
|
||||
atomic_t use_count;
|
||||
unsigned long pointer;
|
||||
size_t size;
|
||||
/* pass surplus references to this pointer */
|
||||
unsigned long excess_ref;
|
||||
/* minimum number of a pointers found before it is considered leak */
|
||||
int min_count;
|
||||
/* the total number of pointers found pointing to this object */
|
||||
@ -253,7 +255,8 @@ enum {
|
||||
KMEMLEAK_NOT_LEAK,
|
||||
KMEMLEAK_IGNORE,
|
||||
KMEMLEAK_SCAN_AREA,
|
||||
KMEMLEAK_NO_SCAN
|
||||
KMEMLEAK_NO_SCAN,
|
||||
KMEMLEAK_SET_EXCESS_REF
|
||||
};
|
||||
|
||||
/*
|
||||
@ -262,9 +265,12 @@ enum {
|
||||
*/
|
||||
struct early_log {
|
||||
int op_type; /* kmemleak operation type */
|
||||
const void *ptr; /* allocated/freed memory block */
|
||||
size_t size; /* memory block size */
|
||||
int min_count; /* minimum reference count */
|
||||
const void *ptr; /* allocated/freed memory block */
|
||||
union {
|
||||
size_t size; /* memory block size */
|
||||
unsigned long excess_ref; /* surplus reference passing */
|
||||
};
|
||||
unsigned long trace[MAX_TRACE]; /* stack trace */
|
||||
unsigned int trace_len; /* stack trace length */
|
||||
};
|
||||
@ -393,7 +399,7 @@ static void dump_object_info(struct kmemleak_object *object)
|
||||
object->comm, object->pid, object->jiffies);
|
||||
pr_notice(" min_count = %d\n", object->min_count);
|
||||
pr_notice(" count = %d\n", object->count);
|
||||
pr_notice(" flags = 0x%lx\n", object->flags);
|
||||
pr_notice(" flags = 0x%x\n", object->flags);
|
||||
pr_notice(" checksum = %u\n", object->checksum);
|
||||
pr_notice(" backtrace:\n");
|
||||
print_stack_trace(&trace, 4);
|
||||
@ -562,6 +568,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
|
||||
object->flags = OBJECT_ALLOCATED;
|
||||
object->pointer = ptr;
|
||||
object->size = size;
|
||||
object->excess_ref = 0;
|
||||
object->min_count = min_count;
|
||||
object->count = 0; /* white color initially */
|
||||
object->jiffies = jiffies;
|
||||
@ -794,6 +801,30 @@ out:
|
||||
put_object(object);
|
||||
}
|
||||
|
||||
/*
|
||||
* Any surplus references (object already gray) to 'ptr' are passed to
|
||||
* 'excess_ref'. This is used in the vmalloc() case where a pointer to
|
||||
* vm_struct may be used as an alternative reference to the vmalloc'ed object
|
||||
* (see free_thread_stack()).
|
||||
*/
|
||||
static void object_set_excess_ref(unsigned long ptr, unsigned long excess_ref)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kmemleak_object *object;
|
||||
|
||||
object = find_and_get_object(ptr, 0);
|
||||
if (!object) {
|
||||
kmemleak_warn("Setting excess_ref on unknown object at 0x%08lx\n",
|
||||
ptr);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&object->lock, flags);
|
||||
object->excess_ref = excess_ref;
|
||||
spin_unlock_irqrestore(&object->lock, flags);
|
||||
put_object(object);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the OBJECT_NO_SCAN flag for the object corresponding to the give
|
||||
* pointer. Such object will not be scanned by kmemleak but references to it
|
||||
@ -908,7 +939,7 @@ static void early_alloc_percpu(struct early_log *log)
|
||||
* @gfp: kmalloc() flags used for kmemleak internal memory allocations
|
||||
*
|
||||
* This function is called from the kernel allocators when a new object
|
||||
* (memory block) is allocated (kmem_cache_alloc, kmalloc, vmalloc etc.).
|
||||
* (memory block) is allocated (kmem_cache_alloc, kmalloc etc.).
|
||||
*/
|
||||
void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
|
||||
gfp_t gfp)
|
||||
@ -951,6 +982,36 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);
|
||||
|
||||
/**
|
||||
* kmemleak_vmalloc - register a newly vmalloc'ed object
|
||||
* @area: pointer to vm_struct
|
||||
* @size: size of the object
|
||||
* @gfp: __vmalloc() flags used for kmemleak internal memory allocations
|
||||
*
|
||||
* This function is called from the vmalloc() kernel allocator when a new
|
||||
* object (memory block) is allocated.
|
||||
*/
|
||||
void __ref kmemleak_vmalloc(const struct vm_struct *area, size_t size, gfp_t gfp)
|
||||
{
|
||||
pr_debug("%s(0x%p, %zu)\n", __func__, area, size);
|
||||
|
||||
/*
|
||||
* A min_count = 2 is needed because vm_struct contains a reference to
|
||||
* the virtual address of the vmalloc'ed block.
|
||||
*/
|
||||
if (kmemleak_enabled) {
|
||||
create_object((unsigned long)area->addr, size, 2, gfp);
|
||||
object_set_excess_ref((unsigned long)area,
|
||||
(unsigned long)area->addr);
|
||||
} else if (kmemleak_early_log) {
|
||||
log_early(KMEMLEAK_ALLOC, area->addr, size, 2);
|
||||
/* reusing early_log.size for storing area->addr */
|
||||
log_early(KMEMLEAK_SET_EXCESS_REF,
|
||||
area, (unsigned long)area->addr, 0);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmemleak_vmalloc);
|
||||
|
||||
/**
|
||||
* kmemleak_free - unregister a previously registered object
|
||||
* @ptr: pointer to beginning of the object
|
||||
@ -1187,6 +1248,30 @@ static bool update_checksum(struct kmemleak_object *object)
|
||||
return object->checksum != old_csum;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update an object's references. object->lock must be held by the caller.
|
||||
*/
|
||||
static void update_refs(struct kmemleak_object *object)
|
||||
{
|
||||
if (!color_white(object)) {
|
||||
/* non-orphan, ignored or new */
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Increase the object's reference count (number of pointers to the
|
||||
* memory block). If this count reaches the required minimum, the
|
||||
* object's color will become gray and it will be added to the
|
||||
* gray_list.
|
||||
*/
|
||||
object->count++;
|
||||
if (color_gray(object)) {
|
||||
/* put_object() called when removing from gray_list */
|
||||
WARN_ON(!get_object(object));
|
||||
list_add_tail(&object->gray_list, &gray_list);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Memory scanning is a long process and it needs to be interruptable. This
|
||||
* function checks whether such interrupt condition occurred.
|
||||
@ -1224,6 +1309,7 @@ static void scan_block(void *_start, void *_end,
|
||||
for (ptr = start; ptr < end; ptr++) {
|
||||
struct kmemleak_object *object;
|
||||
unsigned long pointer;
|
||||
unsigned long excess_ref;
|
||||
|
||||
if (scan_should_stop())
|
||||
break;
|
||||
@ -1259,25 +1345,27 @@ static void scan_block(void *_start, void *_end,
|
||||
* enclosed by scan_mutex.
|
||||
*/
|
||||
spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
|
||||
if (!color_white(object)) {
|
||||
/* non-orphan, ignored or new */
|
||||
spin_unlock(&object->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Increase the object's reference count (number of pointers
|
||||
* to the memory block). If this count reaches the required
|
||||
* minimum, the object's color will become gray and it will be
|
||||
* added to the gray_list.
|
||||
*/
|
||||
object->count++;
|
||||
/* only pass surplus references (object already gray) */
|
||||
if (color_gray(object)) {
|
||||
/* put_object() called when removing from gray_list */
|
||||
WARN_ON(!get_object(object));
|
||||
list_add_tail(&object->gray_list, &gray_list);
|
||||
excess_ref = object->excess_ref;
|
||||
/* no need for update_refs() if object already gray */
|
||||
} else {
|
||||
excess_ref = 0;
|
||||
update_refs(object);
|
||||
}
|
||||
spin_unlock(&object->lock);
|
||||
|
||||
if (excess_ref) {
|
||||
object = lookup_object(excess_ref, 0);
|
||||
if (!object)
|
||||
continue;
|
||||
if (object == scanned)
|
||||
/* circular reference, ignore */
|
||||
continue;
|
||||
spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
|
||||
update_refs(object);
|
||||
spin_unlock(&object->lock);
|
||||
}
|
||||
}
|
||||
read_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
}
|
||||
@ -1980,6 +2068,10 @@ void __init kmemleak_init(void)
|
||||
case KMEMLEAK_NO_SCAN:
|
||||
kmemleak_no_scan(log->ptr);
|
||||
break;
|
||||
case KMEMLEAK_SET_EXCESS_REF:
|
||||
object_set_excess_ref((unsigned long)log->ptr,
|
||||
log->excess_ref);
|
||||
break;
|
||||
default:
|
||||
kmemleak_warn("Unknown early log operation: %d\n",
|
||||
log->op_type);
|
||||
|
@ -54,9 +54,6 @@ struct memblock memblock __initdata_memblock = {
|
||||
};
|
||||
|
||||
int memblock_debug __initdata_memblock;
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
bool movable_node_enabled __initdata_memblock = false;
|
||||
#endif
|
||||
static bool system_has_some_mirror __initdata_memblock = false;
|
||||
static int memblock_can_resize __initdata_memblock;
|
||||
static int memblock_memory_in_slab __initdata_memblock = 0;
|
||||
|
@ -2376,10 +2376,9 @@ void mem_cgroup_split_huge_fixup(struct page *head)
|
||||
|
||||
#ifdef CONFIG_MEMCG_SWAP
|
||||
static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
|
||||
bool charge)
|
||||
int nr_entries)
|
||||
{
|
||||
int val = (charge) ? 1 : -1;
|
||||
this_cpu_add(memcg->stat->count[MEMCG_SWAP], val);
|
||||
this_cpu_add(memcg->stat->count[MEMCG_SWAP], nr_entries);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2405,8 +2404,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
|
||||
new_id = mem_cgroup_id(to);
|
||||
|
||||
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
|
||||
mem_cgroup_swap_statistics(from, false);
|
||||
mem_cgroup_swap_statistics(to, true);
|
||||
mem_cgroup_swap_statistics(from, -1);
|
||||
mem_cgroup_swap_statistics(to, 1);
|
||||
return 0;
|
||||
}
|
||||
return -EINVAL;
|
||||
@ -3574,6 +3573,7 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
|
||||
|
||||
seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
|
||||
seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
|
||||
seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -4122,6 +4122,12 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
|
||||
if (!pn)
|
||||
return 1;
|
||||
|
||||
pn->lruvec_stat = alloc_percpu(struct lruvec_stat);
|
||||
if (!pn->lruvec_stat) {
|
||||
kfree(pn);
|
||||
return 1;
|
||||
}
|
||||
|
||||
lruvec_init(&pn->lruvec);
|
||||
pn->usage_in_excess = 0;
|
||||
pn->on_tree = false;
|
||||
@ -4133,7 +4139,10 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
|
||||
|
||||
static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
|
||||
{
|
||||
kfree(memcg->nodeinfo[node]);
|
||||
struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
|
||||
|
||||
free_percpu(pn->lruvec_stat);
|
||||
kfree(pn);
|
||||
}
|
||||
|
||||
static void __mem_cgroup_free(struct mem_cgroup *memcg)
|
||||
@ -5165,6 +5174,7 @@ static int memory_events_show(struct seq_file *m, void *v)
|
||||
seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
|
||||
seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
|
||||
seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
|
||||
seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -5197,8 +5207,8 @@ static int memory_stat_show(struct seq_file *m, void *v)
|
||||
seq_printf(m, "kernel_stack %llu\n",
|
||||
(u64)stat[MEMCG_KERNEL_STACK_KB] * 1024);
|
||||
seq_printf(m, "slab %llu\n",
|
||||
(u64)(stat[MEMCG_SLAB_RECLAIMABLE] +
|
||||
stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
|
||||
(u64)(stat[NR_SLAB_RECLAIMABLE] +
|
||||
stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
|
||||
seq_printf(m, "sock %llu\n",
|
||||
(u64)stat[MEMCG_SOCK] * PAGE_SIZE);
|
||||
|
||||
@ -5222,15 +5232,25 @@ static int memory_stat_show(struct seq_file *m, void *v)
|
||||
}
|
||||
|
||||
seq_printf(m, "slab_reclaimable %llu\n",
|
||||
(u64)stat[MEMCG_SLAB_RECLAIMABLE] * PAGE_SIZE);
|
||||
(u64)stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE);
|
||||
seq_printf(m, "slab_unreclaimable %llu\n",
|
||||
(u64)stat[MEMCG_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
|
||||
(u64)stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
|
||||
|
||||
/* Accumulated memory events */
|
||||
|
||||
seq_printf(m, "pgfault %lu\n", events[PGFAULT]);
|
||||
seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]);
|
||||
|
||||
seq_printf(m, "pgrefill %lu\n", events[PGREFILL]);
|
||||
seq_printf(m, "pgscan %lu\n", events[PGSCAN_KSWAPD] +
|
||||
events[PGSCAN_DIRECT]);
|
||||
seq_printf(m, "pgsteal %lu\n", events[PGSTEAL_KSWAPD] +
|
||||
events[PGSTEAL_DIRECT]);
|
||||
seq_printf(m, "pgactivate %lu\n", events[PGACTIVATE]);
|
||||
seq_printf(m, "pgdeactivate %lu\n", events[PGDEACTIVATE]);
|
||||
seq_printf(m, "pglazyfree %lu\n", events[PGLAZYFREE]);
|
||||
seq_printf(m, "pglazyfreed %lu\n", events[PGLAZYFREED]);
|
||||
|
||||
seq_printf(m, "workingset_refault %lu\n",
|
||||
stat[WORKINGSET_REFAULT]);
|
||||
seq_printf(m, "workingset_activate %lu\n",
|
||||
@ -5445,7 +5465,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
|
||||
* let's not wait for it. The page already received a
|
||||
* memory+swap charge, drop the swap entry duplicate.
|
||||
*/
|
||||
mem_cgroup_uncharge_swap(entry);
|
||||
mem_cgroup_uncharge_swap(entry, nr_pages);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5873,9 +5893,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
|
||||
* ancestor for the swap instead and transfer the memory+swap charge.
|
||||
*/
|
||||
swap_memcg = mem_cgroup_id_get_online(memcg);
|
||||
oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
|
||||
oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg), 1);
|
||||
VM_BUG_ON_PAGE(oldid, page);
|
||||
mem_cgroup_swap_statistics(swap_memcg, true);
|
||||
mem_cgroup_swap_statistics(swap_memcg, 1);
|
||||
|
||||
page->mem_cgroup = NULL;
|
||||
|
||||
@ -5902,19 +5922,20 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
|
||||
css_put(&memcg->css);
|
||||
}
|
||||
|
||||
/*
|
||||
* mem_cgroup_try_charge_swap - try charging a swap entry
|
||||
/**
|
||||
* mem_cgroup_try_charge_swap - try charging swap space for a page
|
||||
* @page: page being added to swap
|
||||
* @entry: swap entry to charge
|
||||
*
|
||||
* Try to charge @entry to the memcg that @page belongs to.
|
||||
* Try to charge @page's memcg for the swap space at @entry.
|
||||
*
|
||||
* Returns 0 on success, -ENOMEM on failure.
|
||||
*/
|
||||
int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
unsigned int nr_pages = hpage_nr_pages(page);
|
||||
struct page_counter *counter;
|
||||
struct mem_cgroup *memcg;
|
||||
unsigned short oldid;
|
||||
|
||||
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) || !do_swap_account)
|
||||
@ -5929,25 +5950,27 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
|
||||
memcg = mem_cgroup_id_get_online(memcg);
|
||||
|
||||
if (!mem_cgroup_is_root(memcg) &&
|
||||
!page_counter_try_charge(&memcg->swap, 1, &counter)) {
|
||||
!page_counter_try_charge(&memcg->swap, nr_pages, &counter)) {
|
||||
mem_cgroup_id_put(memcg);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
|
||||
/* Get references for the tail pages, too */
|
||||
if (nr_pages > 1)
|
||||
mem_cgroup_id_get_many(memcg, nr_pages - 1);
|
||||
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg), nr_pages);
|
||||
VM_BUG_ON_PAGE(oldid, page);
|
||||
mem_cgroup_swap_statistics(memcg, true);
|
||||
mem_cgroup_swap_statistics(memcg, nr_pages);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_uncharge_swap - uncharge a swap entry
|
||||
* mem_cgroup_uncharge_swap - uncharge swap space
|
||||
* @entry: swap entry to uncharge
|
||||
*
|
||||
* Drop the swap charge associated with @entry.
|
||||
* @nr_pages: the amount of swap space to uncharge
|
||||
*/
|
||||
void mem_cgroup_uncharge_swap(swp_entry_t entry)
|
||||
void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
unsigned short id;
|
||||
@ -5955,18 +5978,18 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
|
||||
if (!do_swap_account)
|
||||
return;
|
||||
|
||||
id = swap_cgroup_record(entry, 0);
|
||||
id = swap_cgroup_record(entry, 0, nr_pages);
|
||||
rcu_read_lock();
|
||||
memcg = mem_cgroup_from_id(id);
|
||||
if (memcg) {
|
||||
if (!mem_cgroup_is_root(memcg)) {
|
||||
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
|
||||
page_counter_uncharge(&memcg->swap, 1);
|
||||
page_counter_uncharge(&memcg->swap, nr_pages);
|
||||
else
|
||||
page_counter_uncharge(&memcg->memsw, 1);
|
||||
page_counter_uncharge(&memcg->memsw, nr_pages);
|
||||
}
|
||||
mem_cgroup_swap_statistics(memcg, false);
|
||||
mem_cgroup_id_put(memcg);
|
||||
mem_cgroup_swap_statistics(memcg, -nr_pages);
|
||||
mem_cgroup_id_put_many(memcg, nr_pages);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
@ -1492,11 +1492,16 @@ EXPORT_SYMBOL(unpoison_memory);
|
||||
static struct page *new_page(struct page *p, unsigned long private, int **x)
|
||||
{
|
||||
int nid = page_to_nid(p);
|
||||
if (PageHuge(p))
|
||||
return alloc_huge_page_node(page_hstate(compound_head(p)),
|
||||
nid);
|
||||
else
|
||||
if (PageHuge(p)) {
|
||||
struct hstate *hstate = page_hstate(compound_head(p));
|
||||
|
||||
if (hstate_is_gigantic(hstate))
|
||||
return alloc_huge_page_node(hstate, NUMA_NO_NODE);
|
||||
|
||||
return alloc_huge_page_node(hstate, nid);
|
||||
} else {
|
||||
return __alloc_pages_node(nid, GFP_HIGHUSER_MOVABLE, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2719,7 +2719,7 @@ int do_swap_page(struct vm_fault *vmf)
|
||||
/* Had to read the page from swap area: Major fault */
|
||||
ret = VM_FAULT_MAJOR;
|
||||
count_vm_event(PGMAJFAULT);
|
||||
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
|
||||
count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
|
||||
} else if (PageHWPoison(page)) {
|
||||
/*
|
||||
* hwpoisoned dirty swapcache pages are kept for killing
|
||||
@ -3837,7 +3837,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
count_vm_event(PGFAULT);
|
||||
mem_cgroup_count_vm_event(vma->vm_mm, PGFAULT);
|
||||
count_memcg_event_mm(vma->vm_mm, PGFAULT);
|
||||
|
||||
/* do counter updates before entering really critical section. */
|
||||
check_sync_rss_stat(current);
|
||||
@ -4014,8 +4014,6 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
||||
goto out;
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
|
||||
if (!ptep)
|
||||
goto out;
|
||||
if (!pte_present(*ptep))
|
||||
goto unlock;
|
||||
*ptepp = ptep;
|
||||
|
@ -79,6 +79,8 @@ static struct {
|
||||
#define memhp_lock_acquire() lock_map_acquire(&mem_hotplug.dep_map)
|
||||
#define memhp_lock_release() lock_map_release(&mem_hotplug.dep_map)
|
||||
|
||||
bool movable_node_enabled = false;
|
||||
|
||||
#ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
|
||||
bool memhp_auto_online;
|
||||
#else
|
||||
@ -300,229 +302,38 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
|
||||
}
|
||||
#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
|
||||
|
||||
static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn,
|
||||
unsigned long end_pfn)
|
||||
{
|
||||
unsigned long old_zone_end_pfn;
|
||||
|
||||
zone_span_writelock(zone);
|
||||
|
||||
old_zone_end_pfn = zone_end_pfn(zone);
|
||||
if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
|
||||
zone->zone_start_pfn = start_pfn;
|
||||
|
||||
zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
|
||||
zone->zone_start_pfn;
|
||||
|
||||
zone_span_writeunlock(zone);
|
||||
}
|
||||
|
||||
static void resize_zone(struct zone *zone, unsigned long start_pfn,
|
||||
unsigned long end_pfn)
|
||||
{
|
||||
zone_span_writelock(zone);
|
||||
|
||||
if (end_pfn - start_pfn) {
|
||||
zone->zone_start_pfn = start_pfn;
|
||||
zone->spanned_pages = end_pfn - start_pfn;
|
||||
} else {
|
||||
/*
|
||||
* make it consist as free_area_init_core(),
|
||||
* if spanned_pages = 0, then keep start_pfn = 0
|
||||
*/
|
||||
zone->zone_start_pfn = 0;
|
||||
zone->spanned_pages = 0;
|
||||
}
|
||||
|
||||
zone_span_writeunlock(zone);
|
||||
}
|
||||
|
||||
static void fix_zone_id(struct zone *zone, unsigned long start_pfn,
|
||||
unsigned long end_pfn)
|
||||
{
|
||||
enum zone_type zid = zone_idx(zone);
|
||||
int nid = zone->zone_pgdat->node_id;
|
||||
unsigned long pfn;
|
||||
|
||||
for (pfn = start_pfn; pfn < end_pfn; pfn++)
|
||||
set_page_links(pfn_to_page(pfn), zid, nid, pfn);
|
||||
}
|
||||
|
||||
/* Can fail with -ENOMEM from allocating a wait table with vmalloc() or
|
||||
* alloc_bootmem_node_nopanic()/memblock_virt_alloc_node_nopanic() */
|
||||
static int __ref ensure_zone_is_initialized(struct zone *zone,
|
||||
unsigned long start_pfn, unsigned long num_pages)
|
||||
{
|
||||
if (!zone_is_initialized(zone))
|
||||
return init_currently_empty_zone(zone, start_pfn, num_pages);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2,
|
||||
unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
int ret;
|
||||
unsigned long flags;
|
||||
unsigned long z1_start_pfn;
|
||||
|
||||
ret = ensure_zone_is_initialized(z1, start_pfn, end_pfn - start_pfn);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pgdat_resize_lock(z1->zone_pgdat, &flags);
|
||||
|
||||
/* can't move pfns which are higher than @z2 */
|
||||
if (end_pfn > zone_end_pfn(z2))
|
||||
goto out_fail;
|
||||
/* the move out part must be at the left most of @z2 */
|
||||
if (start_pfn > z2->zone_start_pfn)
|
||||
goto out_fail;
|
||||
/* must included/overlap */
|
||||
if (end_pfn <= z2->zone_start_pfn)
|
||||
goto out_fail;
|
||||
|
||||
/* use start_pfn for z1's start_pfn if z1 is empty */
|
||||
if (!zone_is_empty(z1))
|
||||
z1_start_pfn = z1->zone_start_pfn;
|
||||
else
|
||||
z1_start_pfn = start_pfn;
|
||||
|
||||
resize_zone(z1, z1_start_pfn, end_pfn);
|
||||
resize_zone(z2, end_pfn, zone_end_pfn(z2));
|
||||
|
||||
pgdat_resize_unlock(z1->zone_pgdat, &flags);
|
||||
|
||||
fix_zone_id(z1, start_pfn, end_pfn);
|
||||
|
||||
return 0;
|
||||
out_fail:
|
||||
pgdat_resize_unlock(z1->zone_pgdat, &flags);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2,
|
||||
unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
int ret;
|
||||
unsigned long flags;
|
||||
unsigned long z2_end_pfn;
|
||||
|
||||
ret = ensure_zone_is_initialized(z2, start_pfn, end_pfn - start_pfn);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pgdat_resize_lock(z1->zone_pgdat, &flags);
|
||||
|
||||
/* can't move pfns which are lower than @z1 */
|
||||
if (z1->zone_start_pfn > start_pfn)
|
||||
goto out_fail;
|
||||
/* the move out part mast at the right most of @z1 */
|
||||
if (zone_end_pfn(z1) > end_pfn)
|
||||
goto out_fail;
|
||||
/* must included/overlap */
|
||||
if (start_pfn >= zone_end_pfn(z1))
|
||||
goto out_fail;
|
||||
|
||||
/* use end_pfn for z2's end_pfn if z2 is empty */
|
||||
if (!zone_is_empty(z2))
|
||||
z2_end_pfn = zone_end_pfn(z2);
|
||||
else
|
||||
z2_end_pfn = end_pfn;
|
||||
|
||||
resize_zone(z1, z1->zone_start_pfn, start_pfn);
|
||||
resize_zone(z2, start_pfn, z2_end_pfn);
|
||||
|
||||
pgdat_resize_unlock(z1->zone_pgdat, &flags);
|
||||
|
||||
fix_zone_id(z2, start_pfn, end_pfn);
|
||||
|
||||
return 0;
|
||||
out_fail:
|
||||
pgdat_resize_unlock(z1->zone_pgdat, &flags);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static struct zone * __meminit move_pfn_range(int zone_shift,
|
||||
unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
struct zone *zone = page_zone(pfn_to_page(start_pfn));
|
||||
int ret = 0;
|
||||
|
||||
if (zone_shift < 0)
|
||||
ret = move_pfn_range_left(zone + zone_shift, zone,
|
||||
start_pfn, end_pfn);
|
||||
else if (zone_shift)
|
||||
ret = move_pfn_range_right(zone, zone + zone_shift,
|
||||
start_pfn, end_pfn);
|
||||
|
||||
if (ret)
|
||||
return NULL;
|
||||
|
||||
return zone + zone_shift;
|
||||
}
|
||||
|
||||
static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
|
||||
unsigned long end_pfn)
|
||||
{
|
||||
unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
|
||||
|
||||
if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
|
||||
pgdat->node_start_pfn = start_pfn;
|
||||
|
||||
pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
|
||||
pgdat->node_start_pfn;
|
||||
}
|
||||
|
||||
static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
|
||||
{
|
||||
struct pglist_data *pgdat = zone->zone_pgdat;
|
||||
int nr_pages = PAGES_PER_SECTION;
|
||||
int nid = pgdat->node_id;
|
||||
int zone_type;
|
||||
unsigned long flags, pfn;
|
||||
int ret;
|
||||
|
||||
zone_type = zone - pgdat->node_zones;
|
||||
ret = ensure_zone_is_initialized(zone, phys_start_pfn, nr_pages);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pgdat_resize_lock(zone->zone_pgdat, &flags);
|
||||
grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages);
|
||||
grow_pgdat_span(zone->zone_pgdat, phys_start_pfn,
|
||||
phys_start_pfn + nr_pages);
|
||||
pgdat_resize_unlock(zone->zone_pgdat, &flags);
|
||||
memmap_init_zone(nr_pages, nid, zone_type,
|
||||
phys_start_pfn, MEMMAP_HOTPLUG);
|
||||
|
||||
/* online_page_range is called later and expects pages reserved */
|
||||
for (pfn = phys_start_pfn; pfn < phys_start_pfn + nr_pages; pfn++) {
|
||||
if (!pfn_valid(pfn))
|
||||
continue;
|
||||
|
||||
SetPageReserved(pfn_to_page(pfn));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __meminit __add_section(int nid, struct zone *zone,
|
||||
unsigned long phys_start_pfn)
|
||||
static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
|
||||
bool want_memblock)
|
||||
{
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (pfn_valid(phys_start_pfn))
|
||||
return -EEXIST;
|
||||
|
||||
ret = sparse_add_one_section(zone, phys_start_pfn);
|
||||
|
||||
ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = __add_zone(zone, phys_start_pfn);
|
||||
/*
|
||||
* Make all the pages reserved so that nobody will stumble over half
|
||||
* initialized state.
|
||||
* FIXME: We also have to associate it with a node because pfn_to_node
|
||||
* relies on having page with the proper node.
|
||||
*/
|
||||
for (i = 0; i < PAGES_PER_SECTION; i++) {
|
||||
unsigned long pfn = phys_start_pfn + i;
|
||||
struct page *page;
|
||||
if (!pfn_valid(pfn))
|
||||
continue;
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
page = pfn_to_page(pfn);
|
||||
set_page_node(page, nid);
|
||||
SetPageReserved(page);
|
||||
}
|
||||
|
||||
if (!want_memblock)
|
||||
return 0;
|
||||
|
||||
return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
|
||||
}
|
||||
@ -533,16 +344,14 @@ static int __meminit __add_section(int nid, struct zone *zone,
|
||||
* call this function after deciding the zone to which to
|
||||
* add the new pages.
|
||||
*/
|
||||
int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
|
||||
unsigned long nr_pages)
|
||||
int __ref __add_pages(int nid, unsigned long phys_start_pfn,
|
||||
unsigned long nr_pages, bool want_memblock)
|
||||
{
|
||||
unsigned long i;
|
||||
int err = 0;
|
||||
int start_sec, end_sec;
|
||||
struct vmem_altmap *altmap;
|
||||
|
||||
clear_zone_contiguous(zone);
|
||||
|
||||
/* during initialize mem_map, align hot-added range to section */
|
||||
start_sec = pfn_to_section_nr(phys_start_pfn);
|
||||
end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
|
||||
@ -562,7 +371,7 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
|
||||
}
|
||||
|
||||
for (i = start_sec; i <= end_sec; i++) {
|
||||
err = __add_section(nid, zone, section_nr_to_pfn(i));
|
||||
err = __add_section(nid, section_nr_to_pfn(i), want_memblock);
|
||||
|
||||
/*
|
||||
* EEXIST is finally dealt with by ioresource collision
|
||||
@ -575,7 +384,6 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
|
||||
}
|
||||
vmemmap_populate_print_last();
|
||||
out:
|
||||
set_zone_contiguous(zone);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__add_pages);
|
||||
@ -939,33 +747,20 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
|
||||
unsigned long i;
|
||||
unsigned long onlined_pages = *(unsigned long *)arg;
|
||||
struct page *page;
|
||||
|
||||
if (PageReserved(pfn_to_page(start_pfn)))
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
page = pfn_to_page(start_pfn + i);
|
||||
(*online_page_callback)(page);
|
||||
onlined_pages++;
|
||||
}
|
||||
|
||||
online_mem_sections(start_pfn, start_pfn + nr_pages);
|
||||
|
||||
*(unsigned long *)arg = onlined_pages;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
/*
|
||||
* When CONFIG_MOVABLE_NODE, we permit onlining of a node which doesn't have
|
||||
* normal memory.
|
||||
*/
|
||||
static bool can_online_high_movable(struct zone *zone)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#else /* CONFIG_MOVABLE_NODE */
|
||||
/* ensure every online node has NORMAL memory */
|
||||
static bool can_online_high_movable(struct zone *zone)
|
||||
{
|
||||
return node_state(zone_to_nid(zone), N_NORMAL_MEMORY);
|
||||
}
|
||||
#endif /* CONFIG_MOVABLE_NODE */
|
||||
|
||||
/* check which state of node_states will be changed when online memory */
|
||||
static void node_states_check_changes_online(unsigned long nr_pages,
|
||||
struct zone *zone, struct memory_notify *arg)
|
||||
@ -1040,39 +835,131 @@ static void node_states_set_node(int node, struct memory_notify *arg)
|
||||
node_set_state(node, N_MEMORY);
|
||||
}
|
||||
|
||||
bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
|
||||
enum zone_type target, int *zone_shift)
|
||||
bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, int online_type)
|
||||
{
|
||||
struct zone *zone = page_zone(pfn_to_page(pfn));
|
||||
enum zone_type idx = zone_idx(zone);
|
||||
int i;
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE];
|
||||
struct zone *default_zone = default_zone_for_pfn(nid, pfn, nr_pages);
|
||||
|
||||
*zone_shift = 0;
|
||||
|
||||
if (idx < target) {
|
||||
/* pages must be at end of current zone */
|
||||
if (pfn + nr_pages != zone_end_pfn(zone))
|
||||
return false;
|
||||
|
||||
/* no zones in use between current zone and target */
|
||||
for (i = idx + 1; i < target; i++)
|
||||
if (zone_is_initialized(zone - idx + i))
|
||||
return false;
|
||||
/*
|
||||
* TODO there shouldn't be any inherent reason to have ZONE_NORMAL
|
||||
* physically before ZONE_MOVABLE. All we need is they do not
|
||||
* overlap. Historically we didn't allow ZONE_NORMAL after ZONE_MOVABLE
|
||||
* though so let's stick with it for simplicity for now.
|
||||
* TODO make sure we do not overlap with ZONE_DEVICE
|
||||
*/
|
||||
if (online_type == MMOP_ONLINE_KERNEL) {
|
||||
if (zone_is_empty(movable_zone))
|
||||
return true;
|
||||
return movable_zone->zone_start_pfn >= pfn + nr_pages;
|
||||
} else if (online_type == MMOP_ONLINE_MOVABLE) {
|
||||
return zone_end_pfn(default_zone) <= pfn;
|
||||
}
|
||||
|
||||
if (target < idx) {
|
||||
/* pages must be at beginning of current zone */
|
||||
if (pfn != zone->zone_start_pfn)
|
||||
return false;
|
||||
/* MMOP_ONLINE_KEEP will always succeed and inherits the current zone */
|
||||
return online_type == MMOP_ONLINE_KEEP;
|
||||
}
|
||||
|
||||
/* no zones in use between current zone and target */
|
||||
for (i = target + 1; i < idx; i++)
|
||||
if (zone_is_initialized(zone - idx + i))
|
||||
return false;
|
||||
static void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn,
|
||||
unsigned long nr_pages)
|
||||
{
|
||||
unsigned long old_end_pfn = zone_end_pfn(zone);
|
||||
|
||||
if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
|
||||
zone->zone_start_pfn = start_pfn;
|
||||
|
||||
zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn;
|
||||
}
|
||||
|
||||
static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn,
|
||||
unsigned long nr_pages)
|
||||
{
|
||||
unsigned long old_end_pfn = pgdat_end_pfn(pgdat);
|
||||
|
||||
if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
|
||||
pgdat->node_start_pfn = start_pfn;
|
||||
|
||||
pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
|
||||
}
|
||||
|
||||
void __ref move_pfn_range_to_zone(struct zone *zone,
|
||||
unsigned long start_pfn, unsigned long nr_pages)
|
||||
{
|
||||
struct pglist_data *pgdat = zone->zone_pgdat;
|
||||
int nid = pgdat->node_id;
|
||||
unsigned long flags;
|
||||
|
||||
if (zone_is_empty(zone))
|
||||
init_currently_empty_zone(zone, start_pfn, nr_pages);
|
||||
|
||||
clear_zone_contiguous(zone);
|
||||
|
||||
/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
|
||||
pgdat_resize_lock(pgdat, &flags);
|
||||
zone_span_writelock(zone);
|
||||
resize_zone_range(zone, start_pfn, nr_pages);
|
||||
zone_span_writeunlock(zone);
|
||||
resize_pgdat_range(pgdat, start_pfn, nr_pages);
|
||||
pgdat_resize_unlock(pgdat, &flags);
|
||||
|
||||
/*
|
||||
* TODO now we have a visible range of pages which are not associated
|
||||
* with their zone properly. Not nice but set_pfnblock_flags_mask
|
||||
* expects the zone spans the pfn range. All the pages in the range
|
||||
* are reserved so nobody should be touching them so we should be safe
|
||||
*/
|
||||
memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG);
|
||||
|
||||
set_zone_contiguous(zone);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns a default kernel memory zone for the given pfn range.
|
||||
* If no kernel zone covers this pfn range it will automatically go
|
||||
* to the ZONE_NORMAL.
|
||||
*/
|
||||
struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
|
||||
unsigned long nr_pages)
|
||||
{
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
int zid;
|
||||
|
||||
for (zid = 0; zid <= ZONE_NORMAL; zid++) {
|
||||
struct zone *zone = &pgdat->node_zones[zid];
|
||||
|
||||
if (zone_intersects(zone, start_pfn, nr_pages))
|
||||
return zone;
|
||||
}
|
||||
|
||||
*zone_shift = target - idx;
|
||||
return true;
|
||||
return &pgdat->node_zones[ZONE_NORMAL];
|
||||
}
|
||||
|
||||
/*
|
||||
* Associates the given pfn range with the given node and the zone appropriate
|
||||
* for the given online type.
|
||||
*/
|
||||
static struct zone * __meminit move_pfn_range(int online_type, int nid,
|
||||
unsigned long start_pfn, unsigned long nr_pages)
|
||||
{
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
struct zone *zone = default_zone_for_pfn(nid, start_pfn, nr_pages);
|
||||
|
||||
if (online_type == MMOP_ONLINE_KEEP) {
|
||||
struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE];
|
||||
/*
|
||||
* MMOP_ONLINE_KEEP defaults to MMOP_ONLINE_KERNEL but use
|
||||
* movable zone if that is not possible (e.g. we are within
|
||||
* or past the existing movable zone)
|
||||
*/
|
||||
if (!allow_online_pfn_range(nid, start_pfn, nr_pages,
|
||||
MMOP_ONLINE_KERNEL))
|
||||
zone = movable_zone;
|
||||
} else if (online_type == MMOP_ONLINE_MOVABLE) {
|
||||
zone = &pgdat->node_zones[ZONE_MOVABLE];
|
||||
}
|
||||
|
||||
move_pfn_range_to_zone(zone, start_pfn, nr_pages);
|
||||
return zone;
|
||||
}
|
||||
|
||||
/* Must be protected by mem_hotplug_begin() */
|
||||
@ -1085,38 +972,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
|
||||
int nid;
|
||||
int ret;
|
||||
struct memory_notify arg;
|
||||
int zone_shift = 0;
|
||||
|
||||
/*
|
||||
* This doesn't need a lock to do pfn_to_page().
|
||||
* The section can't be removed here because of the
|
||||
* memory_block->state_mutex.
|
||||
*/
|
||||
zone = page_zone(pfn_to_page(pfn));
|
||||
|
||||
if ((zone_idx(zone) > ZONE_NORMAL ||
|
||||
online_type == MMOP_ONLINE_MOVABLE) &&
|
||||
!can_online_high_movable(zone))
|
||||
nid = pfn_to_nid(pfn);
|
||||
if (!allow_online_pfn_range(nid, pfn, nr_pages, online_type))
|
||||
return -EINVAL;
|
||||
|
||||
if (online_type == MMOP_ONLINE_KERNEL) {
|
||||
if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift))
|
||||
return -EINVAL;
|
||||
} else if (online_type == MMOP_ONLINE_MOVABLE) {
|
||||
if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages);
|
||||
if (!zone)
|
||||
return -EINVAL;
|
||||
/* associate pfn range with the zone */
|
||||
zone = move_pfn_range(online_type, nid, pfn, nr_pages);
|
||||
|
||||
arg.start_pfn = pfn;
|
||||
arg.nr_pages = nr_pages;
|
||||
node_states_check_changes_online(nr_pages, zone, &arg);
|
||||
|
||||
nid = zone_to_nid(zone);
|
||||
|
||||
ret = memory_notify(MEM_GOING_ONLINE, &arg);
|
||||
ret = notifier_to_errno(ret);
|
||||
if (ret)
|
||||
@ -1311,39 +1178,6 @@ static int check_hotplug_memory_range(u64 start, u64 size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If movable zone has already been setup, newly added memory should be check.
|
||||
* If its address is higher than movable zone, it should be added as movable.
|
||||
* Without this check, movable zone may overlap with other zone.
|
||||
*/
|
||||
static int should_add_memory_movable(int nid, u64 start, u64 size)
|
||||
{
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
pg_data_t *pgdat = NODE_DATA(nid);
|
||||
struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
|
||||
|
||||
if (zone_is_empty(movable_zone))
|
||||
return 0;
|
||||
|
||||
if (movable_zone->zone_start_pfn <= start_pfn)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
|
||||
bool for_device)
|
||||
{
|
||||
#ifdef CONFIG_ZONE_DEVICE
|
||||
if (for_device)
|
||||
return ZONE_DEVICE;
|
||||
#endif
|
||||
if (should_add_memory_movable(nid, start, size))
|
||||
return ZONE_MOVABLE;
|
||||
|
||||
return zone_default;
|
||||
}
|
||||
|
||||
static int online_memory_block(struct memory_block *mem, void *arg)
|
||||
{
|
||||
return device_online(&mem->dev);
|
||||
@ -1389,7 +1223,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
|
||||
}
|
||||
|
||||
/* call arch's memory hotadd */
|
||||
ret = arch_add_memory(nid, start, size, false);
|
||||
ret = arch_add_memory(nid, start, size, true);
|
||||
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
@ -1398,7 +1232,22 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
|
||||
node_set_online(nid);
|
||||
|
||||
if (new_node) {
|
||||
ret = register_one_node(nid);
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
|
||||
ret = __register_one_node(nid);
|
||||
if (ret)
|
||||
goto register_fail;
|
||||
|
||||
/*
|
||||
* link memory sections under this node. This is already
|
||||
* done when creatig memory section in register_new_memory
|
||||
* but that depends to have the node registered so offline
|
||||
* nodes have to go through register_node.
|
||||
* TODO clean up this mess.
|
||||
*/
|
||||
ret = link_mem_sections(nid, start_pfn, nr_pages);
|
||||
register_fail:
|
||||
/*
|
||||
* If sysfs file of new node can't create, cpu on the node
|
||||
* can't be hot-added. There is no rollback way now.
|
||||
@ -1592,11 +1441,9 @@ static struct page *new_node_page(struct page *page, unsigned long private,
|
||||
gfp_mask |= __GFP_HIGHMEM;
|
||||
|
||||
if (!nodes_empty(nmask))
|
||||
new_page = __alloc_pages_nodemask(gfp_mask, 0,
|
||||
node_zonelist(nid, gfp_mask), &nmask);
|
||||
new_page = __alloc_pages_nodemask(gfp_mask, 0, nid, &nmask);
|
||||
if (!new_page)
|
||||
new_page = __alloc_pages(gfp_mask, 0,
|
||||
node_zonelist(nid, gfp_mask));
|
||||
new_page = __alloc_pages(gfp_mask, 0, nid);
|
||||
|
||||
return new_page;
|
||||
}
|
||||
@ -1725,47 +1572,12 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
|
||||
return offlined;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
/*
|
||||
* When CONFIG_MOVABLE_NODE, we permit offlining of a node which doesn't have
|
||||
* normal memory.
|
||||
*/
|
||||
static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#else /* CONFIG_MOVABLE_NODE */
|
||||
/* ensure the node has NORMAL memory if it is still online */
|
||||
static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
|
||||
{
|
||||
struct pglist_data *pgdat = zone->zone_pgdat;
|
||||
unsigned long present_pages = 0;
|
||||
enum zone_type zt;
|
||||
|
||||
for (zt = 0; zt <= ZONE_NORMAL; zt++)
|
||||
present_pages += pgdat->node_zones[zt].present_pages;
|
||||
|
||||
if (present_pages > nr_pages)
|
||||
return true;
|
||||
|
||||
present_pages = 0;
|
||||
for (; zt <= ZONE_MOVABLE; zt++)
|
||||
present_pages += pgdat->node_zones[zt].present_pages;
|
||||
|
||||
/*
|
||||
* we can't offline the last normal memory until all
|
||||
* higher memory is offlined.
|
||||
*/
|
||||
return present_pages == 0;
|
||||
}
|
||||
#endif /* CONFIG_MOVABLE_NODE */
|
||||
|
||||
static int __init cmdline_parse_movable_node(char *p)
|
||||
{
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
||||
movable_node_enabled = true;
|
||||
#else
|
||||
pr_warn("movable_node option not supported\n");
|
||||
pr_warn("movable_node parameter depends on CONFIG_HAVE_MEMBLOCK_NODE_MAP to work properly\n");
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
@ -1887,9 +1699,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
|
||||
node = zone_to_nid(zone);
|
||||
nr_pages = end_pfn - start_pfn;
|
||||
|
||||
if (zone_idx(zone) <= ZONE_NORMAL && !can_offline_normal(zone, nr_pages))
|
||||
return -EINVAL;
|
||||
|
||||
/* set above range as isolated */
|
||||
ret = start_isolate_page_range(start_pfn, end_pfn,
|
||||
MIGRATE_MOVABLE, true);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user