linux/arch/ia64/mm/contig.c

209 lines
5.4 KiB
C
Raw Normal View History

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 1998-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
* Stephane Eranian <eranian@hpl.hp.com>
* Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
* Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved.
*
* Routines used by ia64 machines with contiguous (or virtually contiguous)
* memory.
*/
#include <linux/efi.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/nmi.h>
#include <linux/swap.h>
#include <linux/sizes.h>
#include <asm/efi.h>
#include <asm/meminit.h>
#include <asm/sections.h>
#include <asm/mca.h>
/* physical address where the bootmem map is located */
unsigned long bootmap_start;
#ifdef CONFIG_SMP
static void *cpu_data;
/**
* per_cpu_init - setup per-cpu variables
*
* Allocate and setup per-cpu data areas.
*/
void *per_cpu_init(void)
{
static bool first_time = true;
void *cpu0_data = __cpu0_per_cpu;
unsigned int cpu;
if (!first_time)
goto skip;
first_time = false;
/*
* get_free_pages() cannot be used before cpu_init() done.
* BSP allocates PERCPU_PAGE_SIZE bytes for all possible CPUs
* to avoid that AP calls get_zeroed_page().
*/
for_each_possible_cpu(cpu) {
void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start;
memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start);
__per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start;
per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
/*
* percpu area for cpu0 is moved from the __init area
* which is setup by head.S and used till this point.
* Update ar.k3. This move is ensures that percpu
* area for cpu0 is on the correct node and its
* virtual address isn't insanely far from other
* percpu areas which is important for congruent
* percpu allocator.
*/
if (cpu == 0)
ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) -
(unsigned long)__per_cpu_start);
cpu_data += PERCPU_PAGE_SIZE;
}
skip:
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
}
static inline void
alloc_per_cpu_data(void)
{
ia64: add checks for the return value of memblock_alloc*() Add panic() calls if memblock_alloc*() returns NULL. Most of the changes are simply addition of if(!ptr) panic(); statements after the calls to memblock_alloc*() variants. Exceptions are create_mem_map_page_table() and ia64_log_init() that were slightly refactored to accommodate the change. Link: http://lkml.kernel.org/r/1548057848-15136-15-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Dennis Zhou <dennis@kernel.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Guo Ren <guoren@kernel.org> Cc: Guo Ren <ren_guo@c-sky.com> [c-sky] Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Juergen Gross <jgross@suse.com> [Xen] Cc: Mark Salter <msalter@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Paul Burton <paul.burton@mips.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Rob Herring <robh+dt@kernel.org> Cc: Rob Herring <robh@kernel.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-12 14:30:00 +08:00
size_t size = PERCPU_PAGE_SIZE * num_possible_cpus();
cpu_data = memblock_alloc_from(size, PERCPU_PAGE_SIZE,
memblock: replace __alloc_bootmem with memblock_alloc_from The functions are equivalent, just the later does not require nobootmem translation layer. The conversion is done using the following semantic patch: @@ expression size, align, goal; @@ - __alloc_bootmem(size, align, goal) + memblock_alloc_from(size, align, goal) Link: http://lkml.kernel.org/r/1536927045-23536-21-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: "David S. Miller" <davem@davemloft.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Jonas Bonn <jonas@southpole.se> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Ley Foon Tan <lftan@altera.com> Cc: Mark Salter <msalter@redhat.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Paul Burton <paul.burton@mips.com> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Serge Semin <fancer.lancer@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-31 06:09:03 +08:00
__pa(MAX_DMA_ADDRESS));
ia64: add checks for the return value of memblock_alloc*() Add panic() calls if memblock_alloc*() returns NULL. Most of the changes are simply addition of if(!ptr) panic(); statements after the calls to memblock_alloc*() variants. Exceptions are create_mem_map_page_table() and ia64_log_init() that were slightly refactored to accommodate the change. Link: http://lkml.kernel.org/r/1548057848-15136-15-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Dennis Zhou <dennis@kernel.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Guo Ren <guoren@kernel.org> Cc: Guo Ren <ren_guo@c-sky.com> [c-sky] Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Juergen Gross <jgross@suse.com> [Xen] Cc: Mark Salter <msalter@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Paul Burton <paul.burton@mips.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Rob Herring <robh+dt@kernel.org> Cc: Rob Herring <robh@kernel.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-12 14:30:00 +08:00
if (!cpu_data)
panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
__func__, size, PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
}
/**
* setup_per_cpu_areas - setup percpu areas
*
* Arch code has already allocated and initialized percpu areas. All
* this function has to do is to teach the determined layout to the
* dynamic percpu allocator, which happens to be more complex than
* creating whole new ones using helpers.
*/
void __init
setup_per_cpu_areas(void)
{
struct pcpu_alloc_info *ai;
struct pcpu_group_info *gi;
unsigned int cpu;
ssize_t static_size, reserved_size, dyn_size;
ai = pcpu_alloc_alloc_info(1, num_possible_cpus());
if (!ai)
panic("failed to allocate pcpu_alloc_info");
gi = &ai->groups[0];
/* units are assigned consecutively to possible cpus */
for_each_possible_cpu(cpu)
gi->cpu_map[gi->nr_units++] = cpu;
/* set parameters */
static_size = __per_cpu_end - __per_cpu_start;
reserved_size = PERCPU_MODULE_RESERVE;
dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
if (dyn_size < 0)
panic("percpu area overflow static=%zd reserved=%zd\n",
static_size, reserved_size);
ai->static_size = static_size;
ai->reserved_size = reserved_size;
ai->dyn_size = dyn_size;
ai->unit_size = PERCPU_PAGE_SIZE;
ai->atom_size = PAGE_SIZE;
ai->alloc_size = PERCPU_PAGE_SIZE;
pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]);
pcpu_free_alloc_info(ai);
}
#else
#define alloc_per_cpu_data() do { } while (0)
#endif /* CONFIG_SMP */
/**
* find_memory - setup memory map
*
* Walk the EFI memory map and find usable memory for the system, taking
* into account reserved areas.
*/
void __init
find_memory (void)
{
reserve_memory();
/* first find highest page frame number */
[IA64] min_low_pfn and max_low_pfn calculation fix We have seen bad_pte_print when testing crashdump on an SN machine in recent 2.6.20 kernel. There are tons of bad pte print (pfn < max_low_pfn) reports when the crash kernel boots up, all those reported bad pages are inside initmem range; That is because if the crash kernel code and data happens to be at the beginning of the 1st node. build_node_maps in discontig.c will bypass reserved regions with filter_rsvd_memory. Since min_low_pfn is calculated in build_node_map, so in this case, min_low_pfn will be greater than kernel code and data. Because pages inside initmem are freed and reused later, we saw pfn_valid check fail on those pages. I think this theoretically happen on a normal kernel. When I check min_low_pfn and max_low_pfn calculation in contig.c and discontig.c. I found more issues than this. 1. min_low_pfn and max_low_pfn calculation is inconsistent between contig.c and discontig.c, min_low_pfn is calculated as the first page number of boot memmap in contig.c (Why? Though this may work at the most of the time, I don't think it is the right logic). It is calculated as the lowest physical memory page number bypass reserved regions in discontig.c. max_low_pfn is calculated include reserved regions in contig.c. It is calculated exclude reserved regions in discontig.c. 2. If kernel code and data region is happen to be at the begin or the end of physical memory, when min_low_pfn and max_low_pfn calculation is bypassed kernel code and data, pages in initmem will report bad. 3. initrd is also in reserved regions, if it is at the begin or at the end of physical memory, kernel will refuse to reuse the memory. Because the virt_addr_valid check in free_initrd_mem. So it is better to fix and clean up those issues. Calculate min_low_pfn and max_low_pfn in a consistent way. Signed-off-by: Zou Nan hai <nanhai.zou@intel.com> Acked-by: Jay Lan <jlan@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
2007-03-21 04:41:57 +08:00
min_low_pfn = ~0UL;
max_low_pfn = 0;
efi_memmap_walk(find_max_min_low_pfn, NULL);
max_pfn = max_low_pfn;
memblock: allow to specify flags with memblock_add_node() We want to specify flags when hotplugging memory. Let's prepare to pass flags to memblock_add_node() by adjusting all existing users. Note that when hotplugging memory the system is already up and running and we might have concurrent memblock users: for example, while we're hotplugging memory, kexec_file code might search for suitable memory regions to place kexec images. It's important to add the memory directly to memblock via a single call with the right flags, instead of adding the memory first and apply flags later: otherwise, concurrent memblock users might temporarily stumble over memblocks with wrong flags, which will be important in a follow-up patch that introduces a new flag to properly handle add_memory_driver_managed(). Link: https://lkml.kernel.org/r/20211004093605.5830-4-david@redhat.com Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> Acked-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: David Hildenbrand <david@redhat.com> Acked-by: Shahab Vahedi <shahab@synopsys.com> [arch/arc] Reviewed-by: Mike Rapoport <rppt@linux.ibm.com> Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Eric Biederman <ebiederm@xmission.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Jianyong Wu <Jianyong.Wu@arm.com> Cc: Jiaxun Yang <jiaxun.yang@flygoat.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vineet Gupta <vgupta@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-11-06 04:44:49 +08:00
memblock_add_node(0, PFN_PHYS(max_low_pfn), 0, MEMBLOCK_NONE);
find_initrd();
alloc_per_cpu_data();
}
static int __init find_largest_hole(u64 start, u64 end, void *arg)
{
u64 *max_gap = arg;
static u64 last_end = PAGE_OFFSET;
/* NOTE: this algorithm assumes efi memmap table is ordered */
if (*max_gap < (start - last_end))
*max_gap = start - last_end;
last_end = end;
return 0;
}
static void __init verify_gap_absence(void)
{
unsigned long max_gap;
/* Forbid FLATMEM if hole is > than 1G */
efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
if (max_gap >= SZ_1G)
panic("Cannot use FLATMEM with %ldMB hole\n"
"Please switch over to SPARSEMEM\n",
(max_gap >> 20));
}
/*
* Set up the page tables.
*/
void __init
paging_init (void)
{
unsigned long max_dma;
unsigned long max_zone_pfns[MAX_NR_ZONES];
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
max_zone_pfns[ZONE_DMA32] = max_dma;
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
verify_gap_absence();
mm: use free_area_init() instead of free_area_init_nodes() free_area_init() has effectively became a wrapper for free_area_init_nodes() and there is no point of keeping it. Still free_area_init() name is shorter and more general as it does not imply necessity to initialize multiple nodes. Rename free_area_init_nodes() to free_area_init(), update the callers and drop old version of free_area_init(). Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Tested-by: Hoan Tran <hoan@os.amperecomputing.com> [arm64] Reviewed-by: Baoquan He <bhe@redhat.com> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Cc: Brian Cain <bcain@codeaurora.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Ungerer <gerg@linux-m68k.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Guo Ren <guoren@kernel.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Ley Foon Tan <ley.foon.tan@intel.com> Cc: Mark Salter <msalter@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Hocko <mhocko@kernel.org> Cc: Michal Simek <monstr@monstr.eu> Cc: Nick Hu <nickhu@andestech.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Link: http://lkml.kernel.org/r/20200412194859.12663-6-rppt@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-04 06:57:10 +08:00
free_area_init(max_zone_pfns);
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}