diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 70620d0dd923..974a565797d8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -961,7 +961,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z node_states_set_node(nid, &arg); if (need_zonelists_rebuild) build_all_zonelists(NULL); - zone_pcp_update(zone); /* Basic onlining is complete, allow allocation of onlined pages. */ undo_isolate_page_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE); @@ -974,6 +973,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z */ shuffle_zone(zone); + /* reinitialise watermarks and update pcp limits */ init_per_zone_wmark_min(); kswapd_run(nid); @@ -1829,13 +1829,13 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages); adjust_present_page_count(zone, -nr_pages); + /* reinitialise watermarks and update pcp limits */ init_per_zone_wmark_min(); if (!populated_zone(zone)) { zone_pcp_reset(zone); build_all_zonelists(NULL); - } else - zone_pcp_update(zone); + } node_states_clear_node(node, &arg); if (arg.status_change_nid >= 0) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5abf2c1d4c58..19ec81d403a0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2174,14 +2174,6 @@ void __init page_alloc_init_late(void) /* Block until all are initialised */ wait_for_completion(&pgdat_init_all_done_comp); - /* - * The number of managed pages has changed due to the initialisation - * so the pcpu batch and high limits needs to be updated or the limits - * will be artificially small. - */ - for_each_populated_zone(zone) - zone_pcp_update(zone); - /* * We initialized the rest of the deferred pages. Permanently disable * on-demand struct page initialization. @@ -6633,13 +6625,12 @@ static int zone_batchsize(struct zone *zone) int batch; /* - * The per-cpu-pages pools are set to around 1000th of the - * size of the zone. + * The number of pages to batch allocate is either ~0.1% + * of the zone or 1MB, whichever is smaller. The batch + * size is striking a balance between allocation latency + * and zone lock contention. */ - batch = zone_managed_pages(zone) / 1024; - /* But no more than a meg. */ - if (batch * PAGE_SIZE > 1024 * 1024) - batch = (1024 * 1024) / PAGE_SIZE; + batch = min(zone_managed_pages(zone) >> 10, (1024 * 1024) / PAGE_SIZE); batch /= 4; /* We effectively *= 4 below */ if (batch < 1) batch = 1; @@ -6676,6 +6667,34 @@ static int zone_batchsize(struct zone *zone) #endif } +static int zone_highsize(struct zone *zone, int batch) +{ +#ifdef CONFIG_MMU + int high; + int nr_local_cpus; + + /* + * The high value of the pcp is based on the zone low watermark + * so that if they are full then background reclaim will not be + * started prematurely. The value is split across all online CPUs + * local to the zone. Note that early in boot that CPUs may not be + * online yet. + */ + nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone)))); + high = low_wmark_pages(zone) / nr_local_cpus; + + /* + * Ensure high is at least batch*4. The multiple is based on the + * historical relationship between high and batch. + */ + high = max(high, batch << 2); + + return high; +#else + return 0; +#endif +} + /* * pcp->high and pcp->batch values are related and generally batch is lower * than high. They are also related to pcp->count such that count is lower @@ -6737,11 +6756,10 @@ static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long h */ static void zone_set_pageset_high_and_batch(struct zone *zone) { - unsigned long new_high, new_batch; + int new_high, new_batch; - new_batch = zone_batchsize(zone); - new_high = 6 * new_batch; - new_batch = max(1UL, 1 * new_batch); + new_batch = max(1, zone_batchsize(zone)); + new_high = zone_highsize(zone, new_batch); if (zone->pageset_high == new_high && zone->pageset_batch == new_batch) @@ -8222,11 +8240,19 @@ static void __setup_per_zone_wmarks(void) */ void setup_per_zone_wmarks(void) { + struct zone *zone; static DEFINE_SPINLOCK(lock); spin_lock(&lock); __setup_per_zone_wmarks(); spin_unlock(&lock); + + /* + * The watermark size have changed so update the pcpu batch + * and high limits or the limits may be inappropriate. + */ + for_each_zone(zone) + zone_pcp_update(zone); } /*