mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-17 01:34:00 +08:00
mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set
This patch initalises all low memory struct pages and 2G of the highest zone on each node during memory initialisation if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set. That config option cannot be set but will be available in a later patch. Parallel initialisation of struct page depends on some features from memory hotplug and it is necessary to alter alter section annotations. Signed-off-by: Mel Gorman <mgorman@suse.de> Tested-by: Nate Zimmer <nzimmer@sgi.com> Tested-by: Waiman Long <waiman.long@hp.com> Tested-by: Daniel J Blueman <daniel@numascale.com> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Robin Holt <robinmholt@gmail.com> Cc: Nate Zimmer <nzimmer@sgi.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Waiman Long <waiman.long@hp.com> Cc: Scott Norton <scott.norton@hp.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
75a592a471
commit
3a80a7fa79
@ -359,12 +359,16 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
|
|||||||
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
|
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
|
||||||
#define page_initialized(page) (page->lru.next)
|
#define page_initialized(page) (page->lru.next)
|
||||||
|
|
||||||
static int get_nid_for_pfn(unsigned long pfn)
|
static int __init_refok get_nid_for_pfn(unsigned long pfn)
|
||||||
{
|
{
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
if (!pfn_valid_within(pfn))
|
if (!pfn_valid_within(pfn))
|
||||||
return -1;
|
return -1;
|
||||||
|
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||||
|
if (system_state == SYSTEM_BOOTING)
|
||||||
|
return early_pfn_to_nid(pfn);
|
||||||
|
#endif
|
||||||
page = pfn_to_page(pfn);
|
page = pfn_to_page(pfn);
|
||||||
if (!page_initialized(page))
|
if (!page_initialized(page))
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -762,6 +762,14 @@ typedef struct pglist_data {
|
|||||||
/* Number of pages migrated during the rate limiting time interval */
|
/* Number of pages migrated during the rate limiting time interval */
|
||||||
unsigned long numabalancing_migrate_nr_pages;
|
unsigned long numabalancing_migrate_nr_pages;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||||
|
/*
|
||||||
|
* If memory initialisation on large machines is deferred then this
|
||||||
|
* is the first PFN that needs to be initialised.
|
||||||
|
*/
|
||||||
|
unsigned long first_deferred_pfn;
|
||||||
|
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
|
||||||
} pg_data_t;
|
} pg_data_t;
|
||||||
|
|
||||||
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
|
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
|
||||||
|
18
mm/Kconfig
18
mm/Kconfig
@ -636,3 +636,21 @@ config MAX_STACK_SIZE_MB
|
|||||||
changed to a smaller value in which case that is used.
|
changed to a smaller value in which case that is used.
|
||||||
|
|
||||||
A sane initial value is 80 MB.
|
A sane initial value is 80 MB.
|
||||||
|
|
||||||
|
# For architectures that support deferred memory initialisation
|
||||||
|
config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
|
||||||
|
bool
|
||||||
|
|
||||||
|
config DEFERRED_STRUCT_PAGE_INIT
|
||||||
|
bool "Defer initialisation of struct pages to kswapd"
|
||||||
|
default n
|
||||||
|
depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
|
||||||
|
depends on MEMORY_HOTPLUG
|
||||||
|
help
|
||||||
|
Ordinarily all struct pages are initialised during early boot in a
|
||||||
|
single thread. On very large machines this can take a considerable
|
||||||
|
amount of time. If this option is set, large machines will bring up
|
||||||
|
a subset of memmap at boot and then initialise the rest in parallel
|
||||||
|
when kswapd starts. This has a potential performance impact on
|
||||||
|
processes running early in the lifetime of the systemm until kswapd
|
||||||
|
finishes the initialisation.
|
||||||
|
@ -387,6 +387,24 @@ static inline void mminit_verify_zonelist(void)
|
|||||||
}
|
}
|
||||||
#endif /* CONFIG_DEBUG_MEMORY_INIT */
|
#endif /* CONFIG_DEBUG_MEMORY_INIT */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Deferred struct page initialisation requires init functions that are freed
|
||||||
|
* before kswapd is available. Reuse the memory hotplug section annotation
|
||||||
|
* to mark the required code.
|
||||||
|
*
|
||||||
|
* __defermem_init is code that always exists but is annotated __meminit to
|
||||||
|
* avoid section warnings.
|
||||||
|
* __defer_init code gets marked __meminit when deferring struct page
|
||||||
|
* initialistion but is otherwise in the init section.
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||||
|
#define __defermem_init __meminit
|
||||||
|
#define __defer_init __meminit
|
||||||
|
#else
|
||||||
|
#define __defermem_init
|
||||||
|
#define __defer_init __init
|
||||||
|
#endif
|
||||||
|
|
||||||
/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
|
/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
|
||||||
#if defined(CONFIG_SPARSEMEM)
|
#if defined(CONFIG_SPARSEMEM)
|
||||||
extern void mminit_validate_memmodel_limits(unsigned long *start_pfn,
|
extern void mminit_validate_memmodel_limits(unsigned long *start_pfn,
|
||||||
|
@ -235,6 +235,64 @@ EXPORT_SYMBOL(nr_online_nodes);
|
|||||||
|
|
||||||
int page_group_by_mobility_disabled __read_mostly;
|
int page_group_by_mobility_disabled __read_mostly;
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||||
|
static inline void reset_deferred_meminit(pg_data_t *pgdat)
|
||||||
|
{
|
||||||
|
pgdat->first_deferred_pfn = ULONG_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns true if the struct page for the pfn is uninitialised */
|
||||||
|
static inline bool __defermem_init early_page_uninitialised(unsigned long pfn)
|
||||||
|
{
|
||||||
|
int nid = early_pfn_to_nid(pfn);
|
||||||
|
|
||||||
|
if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns false when the remaining initialisation should be deferred until
|
||||||
|
* later in the boot cycle when it can be parallelised.
|
||||||
|
*/
|
||||||
|
static inline bool update_defer_init(pg_data_t *pgdat,
|
||||||
|
unsigned long pfn, unsigned long zone_end,
|
||||||
|
unsigned long *nr_initialised)
|
||||||
|
{
|
||||||
|
/* Always populate low zones for address-contrained allocations */
|
||||||
|
if (zone_end < pgdat_end_pfn(pgdat))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* Initialise at least 2G of the highest zone */
|
||||||
|
(*nr_initialised)++;
|
||||||
|
if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) &&
|
||||||
|
(pfn & (PAGES_PER_SECTION - 1)) == 0) {
|
||||||
|
pgdat->first_deferred_pfn = pfn;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline void reset_deferred_meminit(pg_data_t *pgdat)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool early_page_uninitialised(unsigned long pfn)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool update_defer_init(pg_data_t *pgdat,
|
||||||
|
unsigned long pfn, unsigned long zone_end,
|
||||||
|
unsigned long *nr_initialised)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
void set_pageblock_migratetype(struct page *page, int migratetype)
|
void set_pageblock_migratetype(struct page *page, int migratetype)
|
||||||
{
|
{
|
||||||
if (unlikely(page_group_by_mobility_disabled &&
|
if (unlikely(page_group_by_mobility_disabled &&
|
||||||
@ -878,8 +936,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
|
|||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
|
static void __defer_init __free_pages_boot_core(struct page *page,
|
||||||
unsigned int order)
|
unsigned long pfn, unsigned int order)
|
||||||
{
|
{
|
||||||
unsigned int nr_pages = 1 << order;
|
unsigned int nr_pages = 1 << order;
|
||||||
struct page *p = page;
|
struct page *p = page;
|
||||||
@ -951,6 +1009,14 @@ static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
|
||||||
|
unsigned int order)
|
||||||
|
{
|
||||||
|
if (early_page_uninitialised(pfn))
|
||||||
|
return;
|
||||||
|
return __free_pages_boot_core(page, pfn, order);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_CMA
|
#ifdef CONFIG_CMA
|
||||||
/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
|
/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
|
||||||
void __init init_cma_reserved_pageblock(struct page *page)
|
void __init init_cma_reserved_pageblock(struct page *page)
|
||||||
@ -4325,14 +4391,16 @@ static void setup_zone_migrate_reserve(struct zone *zone)
|
|||||||
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
|
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
|
||||||
unsigned long start_pfn, enum memmap_context context)
|
unsigned long start_pfn, enum memmap_context context)
|
||||||
{
|
{
|
||||||
|
pg_data_t *pgdat = NODE_DATA(nid);
|
||||||
unsigned long end_pfn = start_pfn + size;
|
unsigned long end_pfn = start_pfn + size;
|
||||||
unsigned long pfn;
|
unsigned long pfn;
|
||||||
struct zone *z;
|
struct zone *z;
|
||||||
|
unsigned long nr_initialised = 0;
|
||||||
|
|
||||||
if (highest_memmap_pfn < end_pfn - 1)
|
if (highest_memmap_pfn < end_pfn - 1)
|
||||||
highest_memmap_pfn = end_pfn - 1;
|
highest_memmap_pfn = end_pfn - 1;
|
||||||
|
|
||||||
z = &NODE_DATA(nid)->node_zones[zone];
|
z = &pgdat->node_zones[zone];
|
||||||
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
|
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
|
||||||
/*
|
/*
|
||||||
* There can be holes in boot-time mem_map[]s
|
* There can be holes in boot-time mem_map[]s
|
||||||
@ -4344,6 +4412,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
|
|||||||
continue;
|
continue;
|
||||||
if (!early_pfn_in_nid(pfn, nid))
|
if (!early_pfn_in_nid(pfn, nid))
|
||||||
continue;
|
continue;
|
||||||
|
if (!update_defer_init(pgdat, pfn, end_pfn,
|
||||||
|
&nr_initialised))
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
__init_single_pfn(pfn, zone, nid);
|
__init_single_pfn(pfn, zone, nid);
|
||||||
}
|
}
|
||||||
@ -5144,6 +5215,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
|
|||||||
/* pg_data_t should be reset to zero when it's allocated */
|
/* pg_data_t should be reset to zero when it's allocated */
|
||||||
WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
|
WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
|
||||||
|
|
||||||
|
reset_deferred_meminit(pgdat);
|
||||||
pgdat->node_id = nid;
|
pgdat->node_id = nid;
|
||||||
pgdat->node_start_pfn = node_start_pfn;
|
pgdat->node_start_pfn = node_start_pfn;
|
||||||
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
||||||
|
Loading…
Reference in New Issue
Block a user