Merge patch series "support allocating crashkernel above 4G explicitly on riscv"

Chen Jiahao <chenjiahao16@huawei.com> says:

On riscv, the current crash kernel allocation logic is trying to
allocate within 32bit addressible memory region by default, if
failed, try to allocate without 4G restriction.

In need of saving DMA zone memory while allocating a relatively large
crash kernel region, allocating the reserved memory top down in
high memory, without overlapping the DMA zone, is a mature solution.
Hence this patchset introduces the parameter option crashkernel=X,[high,low].

One can reserve the crash kernel from high memory above DMA zone range
by explicitly passing "crashkernel=X,high"; or reserve a memory range
below 4G with "crashkernel=X,low". Besides, there are few rules need
to take notice:
1. "crashkernel=X,[high,low]" will be ignored if "crashkernel=size"
   is specified.
2. "crashkernel=X,low" is valid only when "crashkernel=X,high" is passed
   and there is enough memory to be allocated under 4G.
3. When allocating crashkernel above 4G and no "crashkernel=X,low" is
   specified, a 128M low memory will be allocated automatically for
   swiotlb bounce buffer.
See Documentation/admin-guide/kernel-parameters.txt for more information.

To verify loading the crashkernel, adapted kexec-tools is attached below:
https://github.com/chenjh005/kexec-tools/tree/build-test-riscv-v2

Following test cases have been performed as expected:
1) crashkernel=256M                          //low=256M
2) crashkernel=1G                            //low=1G
3) crashkernel=4G                            //high=4G, low=128M(default)
4) crashkernel=4G crashkernel=256M,high      //high=4G, low=128M(default), high is ignored
5) crashkernel=4G crashkernel=256M,low       //high=4G, low=128M(default), low is ignored
6) crashkernel=4G,high                       //high=4G, low=128M(default)
7) crashkernel=256M,low                      //low=0M, invalid
8) crashkernel=4G,high crashkernel=256M,low  //high=4G, low=256M
9) crashkernel=4G,high crashkernel=4G,low    //high=0M, low=0M, invalid
10) crashkernel=512M@0xd0000000              //low=512M
11) crashkernel=1G,high crashkernel=0M,low   //high=1G, low=0M

* b4-shazam-merge:
  docs: kdump: Update the crashkernel description for riscv
  riscv: kdump: Implement crashkernel=X,[high,low]

Link: https://lore.kernel.org/r/20230726175000.2536220-1-chenjiahao16@huawei.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Palmer Dabbelt 2023-08-16 07:51:53 -07:00
commit 9389e6715f
No known key found for this signature in database
GPG Key ID: 2E1319F35FBB1889
3 changed files with 99 additions and 14 deletions

View File

@ -862,7 +862,7 @@
memory region [offset, offset + size] for that kernel
image. If '@offset' is omitted, then a suitable offset
is selected automatically.
[KNL, X86-64, ARM64] Select a region under 4G first, and
[KNL, X86-64, ARM64, RISCV] Select a region under 4G first, and
fall back to reserve region above 4G when '@offset'
hasn't been specified.
See Documentation/admin-guide/kdump/kdump.rst for further details.
@ -875,14 +875,14 @@
Documentation/admin-guide/kdump/kdump.rst for an example.
crashkernel=size[KMG],high
[KNL, X86-64, ARM64] range could be above 4G. Allow kernel
to allocate physical memory region from top, so could
be above 4G if system have more than 4G ram installed.
Otherwise memory region will be allocated below 4G, if
available.
[KNL, X86-64, ARM64, RISCV] range could be above 4G.
Allow kernel to allocate physical memory region from top,
so could be above 4G if system have more than 4G ram
installed. Otherwise memory region will be allocated
below 4G, if available.
It will be ignored if crashkernel=X is specified.
crashkernel=size[KMG],low
[KNL, X86-64, ARM64] range under 4G. When crashkernel=X,high
[KNL, X86-64, ARM64, RISCV] range under 4G. When crashkernel=X,high
is passed, kernel could allocate physical memory region
above 4G, that cause second kernel crash on system
that require some amount of low memory, e.g. swiotlb
@ -893,6 +893,7 @@
size is platform dependent.
--> x86: max(swiotlb_size_or_default() + 8MiB, 256MiB)
--> arm64: 128MiB
--> riscv: 128MiB
This one lets the user specify own low range under 4G
for second kernel instead.
0: to disable low allocation.

View File

@ -178,6 +178,11 @@ static void __init init_resources(void)
if (ret < 0)
goto error;
}
if (crashk_low_res.start != crashk_low_res.end) {
ret = add_resource(&iomem_resource, &crashk_low_res);
if (ret < 0)
goto error;
}
#endif
#ifdef CONFIG_CRASH_DUMP

View File

@ -1297,6 +1297,28 @@ static inline void setup_vm_final(void)
}
#endif /* CONFIG_MMU */
/* Reserve 128M low memory by default for swiotlb buffer */
#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20)
static int __init reserve_crashkernel_low(unsigned long long low_size)
{
unsigned long long low_base;
low_base = memblock_phys_alloc_range(low_size, PMD_SIZE, 0, dma32_phys_limit);
if (!low_base) {
pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
return -ENOMEM;
}
pr_info("crashkernel low memory reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
low_base, low_base + low_size, low_size >> 20);
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
return 0;
}
/*
* reserve_crashkernel() - reserves memory for crash kernel
*
@ -1308,8 +1330,12 @@ static void __init reserve_crashkernel(void)
{
unsigned long long crash_base = 0;
unsigned long long crash_size = 0;
unsigned long long crash_low_size = 0;
unsigned long search_start = memblock_start_of_DRAM();
unsigned long search_end = memblock_end_of_DRAM();
unsigned long search_end = (unsigned long)dma32_phys_limit;
char *cmdline = boot_command_line;
bool fixed_base = false;
bool high = false;
int ret = 0;
@ -1325,14 +1351,36 @@ static void __init reserve_crashkernel(void)
return;
}
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
&crash_size, &crash_base);
if (ret || !crash_size)
if (ret == -ENOENT) {
/* Fallback to crashkernel=X,[high,low] */
ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
if (ret || !crash_size)
return;
/*
* crashkernel=Y,low is valid only when crashkernel=X,high
* is passed.
*/
ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
if (ret == -ENOENT)
crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
else if (ret)
return;
search_start = (unsigned long)dma32_phys_limit;
search_end = memblock_end_of_DRAM();
high = true;
} else if (ret || !crash_size) {
/* Invalid argument value specified */
return;
}
crash_size = PAGE_ALIGN(crash_size);
if (crash_base) {
fixed_base = true;
search_start = crash_base;
search_end = crash_base + crash_size;
}
@ -1345,12 +1393,37 @@ static void __init reserve_crashkernel(void)
* swiotlb can work on the crash kernel.
*/
crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
search_start,
min(search_end, (unsigned long) SZ_4G));
search_start, search_end);
if (crash_base == 0) {
/* Try again without restricting region to 32bit addressible memory */
/*
* For crashkernel=size[KMG]@offset[KMG], print out failure
* message if can't reserve the specified region.
*/
if (fixed_base) {
pr_warn("crashkernel: allocating failed with given size@offset\n");
return;
}
if (high) {
/*
* For crashkernel=size[KMG],high, if the first attempt was
* for high memory, fall back to low memory.
*/
search_start = memblock_start_of_DRAM();
search_end = (unsigned long)dma32_phys_limit;
} else {
/*
* For crashkernel=size[KMG], if the first attempt was for
* low memory, fall back to high memory, the minimum required
* low memory will be reserved later.
*/
search_start = (unsigned long)dma32_phys_limit;
search_end = memblock_end_of_DRAM();
crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
}
crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
search_start, search_end);
search_start, search_end);
if (crash_base == 0) {
pr_warn("crashkernel: couldn't allocate %lldKB\n",
crash_size >> 10);
@ -1358,6 +1431,12 @@ static void __init reserve_crashkernel(void)
}
}
if ((crash_base >= dma32_phys_limit) && crash_low_size &&
reserve_crashkernel_low(crash_low_size)) {
memblock_phys_free(crash_base, crash_size);
return;
}
pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
crash_base, crash_base + crash_size, crash_size >> 20);