2019-05-27 14:55:17 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* kernel/ksysfs.c - sysfs attributes in /sys/kernel, which
|
|
|
|
* are not related to any other subsystem
|
|
|
|
*
|
|
|
|
* Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
|
|
|
|
*/
|
|
|
|
|
2022-11-03 23:24:07 +08:00
|
|
|
#include <asm/byteorder.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/kobject.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/sysfs.h>
|
2011-05-24 02:51:41 +08:00
|
|
|
#include <linux/export.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/init.h>
|
2006-06-23 17:05:07 +08:00
|
|
|
#include <linux/kexec.h>
|
2008-10-16 13:01:46 +08:00
|
|
|
#include <linux/profile.h>
|
2011-07-29 02:22:29 +08:00
|
|
|
#include <linux/stat.h>
|
2007-10-15 23:00:14 +08:00
|
|
|
#include <linux/sched.h>
|
2011-02-28 22:57:17 +08:00
|
|
|
#include <linux/capability.h>
|
2014-04-08 06:39:20 +08:00
|
|
|
#include <linux/compiler.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2015-11-25 07:44:06 +08:00
|
|
|
#include <linux/rcupdate.h> /* rcu_expedited and rcu_normal */
|
2014-02-12 05:10:12 +08:00
|
|
|
|
2022-11-03 23:24:07 +08:00
|
|
|
#if defined(__LITTLE_ENDIAN)
|
|
|
|
#define CPU_BYTEORDER_STRING "little"
|
|
|
|
#elif defined(__BIG_ENDIAN)
|
|
|
|
#define CPU_BYTEORDER_STRING "big"
|
|
|
|
#else
|
|
|
|
#error Unknown byteorder
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#define KERNEL_ATTR_RO(_name) \
|
2007-11-02 20:47:53 +08:00
|
|
|
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#define KERNEL_ATTR_RW(_name) \
|
2022-03-24 07:05:35 +08:00
|
|
|
static struct kobj_attribute _name##_attr = __ATTR_RW(_name)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-11-11 11:58:04 +08:00
|
|
|
/* current uevent sequence number */
|
2007-11-02 20:47:53 +08:00
|
|
|
static ssize_t uevent_seqnum_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2024-02-14 16:48:28 +08:00
|
|
|
return sysfs_emit(buf, "%llu\n", (u64)atomic64_read(&uevent_seqnum));
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2005-11-11 11:58:04 +08:00
|
|
|
KERNEL_ATTR_RO(uevent_seqnum);
|
|
|
|
|
2022-11-03 23:24:07 +08:00
|
|
|
/* cpu byteorder */
|
|
|
|
static ssize_t cpu_byteorder_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
return sysfs_emit(buf, "%s\n", CPU_BYTEORDER_STRING);
|
|
|
|
}
|
|
|
|
KERNEL_ATTR_RO(cpu_byteorder);
|
|
|
|
|
2022-12-22 00:17:52 +08:00
|
|
|
/* address bits */
|
|
|
|
static ssize_t address_bits_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
return sysfs_emit(buf, "%zu\n", sizeof(void *) * 8 /* CHAR_BIT */);
|
|
|
|
}
|
|
|
|
KERNEL_ATTR_RO(address_bits);
|
|
|
|
|
2014-04-11 05:09:31 +08:00
|
|
|
#ifdef CONFIG_UEVENT_HELPER
|
2010-01-18 05:14:26 +08:00
|
|
|
/* uevent helper program, used during early boot */
|
2007-11-02 20:47:53 +08:00
|
|
|
static ssize_t uevent_helper_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
2005-11-11 11:58:04 +08:00
|
|
|
{
|
2023-03-24 23:40:41 +08:00
|
|
|
return sysfs_emit(buf, "%s\n", uevent_helper);
|
2005-11-11 11:58:04 +08:00
|
|
|
}
|
2007-11-02 20:47:53 +08:00
|
|
|
static ssize_t uevent_helper_store(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr,
|
|
|
|
const char *buf, size_t count)
|
2005-11-11 11:58:04 +08:00
|
|
|
{
|
2005-11-16 16:00:00 +08:00
|
|
|
if (count+1 > UEVENT_HELPER_PATH_LEN)
|
2005-11-11 11:58:04 +08:00
|
|
|
return -ENOENT;
|
2007-11-02 20:47:53 +08:00
|
|
|
memcpy(uevent_helper, buf, count);
|
2005-11-16 16:00:00 +08:00
|
|
|
uevent_helper[count] = '\0';
|
|
|
|
if (count && uevent_helper[count-1] == '\n')
|
|
|
|
uevent_helper[count-1] = '\0';
|
2005-11-11 11:58:04 +08:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
KERNEL_ATTR_RW(uevent_helper);
|
2014-04-11 05:09:31 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-10-16 13:01:46 +08:00
|
|
|
#ifdef CONFIG_PROFILING
|
|
|
|
static ssize_t profiling_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
|
|
|
{
|
2023-03-24 23:40:41 +08:00
|
|
|
return sysfs_emit(buf, "%d\n", prof_on);
|
2008-10-16 13:01:46 +08:00
|
|
|
}
|
|
|
|
static ssize_t profiling_store(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr,
|
|
|
|
const char *buf, size_t count)
|
|
|
|
{
|
|
|
|
int ret;
|
profiling: remove prof_cpu_mask
syzbot is reporting uninit-value at profile_hits(), for there is a race
window between
if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
return -ENOMEM;
cpumask_copy(prof_cpu_mask, cpu_possible_mask);
in profile_init() and
cpumask_available(prof_cpu_mask) &&
cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
in profile_tick(); prof_cpu_mask remains uninitialzed until cpumask_copy()
completes while cpumask_available(prof_cpu_mask) returns true as soon as
alloc_cpumask_var(&prof_cpu_mask) completes.
We could replace alloc_cpumask_var() with zalloc_cpumask_var() and
call cpumask_copy() from create_proc_profile() on only UP kernels, for
profile_online_cpu() calls cpumask_set_cpu() as needed via
cpuhp_setup_state(CPUHP_AP_ONLINE_DYN) on SMP kernels. But this patch
removes prof_cpu_mask because it seems unnecessary.
The cpumask_test_cpu(smp_processor_id(), prof_cpu_mask) test
in profile_tick() is likely always true due to
a CPU cannot call profile_tick() if that CPU is offline
and
cpumask_set_cpu(cpu, prof_cpu_mask) is called when that CPU becomes
online and cpumask_clear_cpu(cpu, prof_cpu_mask) is called when that
CPU becomes offline
. This test could be false during transition between online and offline.
But according to include/linux/cpuhotplug.h , CPUHP_PROFILE_PREPARE
belongs to PREPARE section, which means that the CPU subjected to
profile_dead_cpu() cannot be inside profile_tick() (i.e. no risk of
use-after-free bug) because interrupt for that CPU is disabled during
PREPARE section. Therefore, this test is guaranteed to be true, and
can be removed. (Since profile_hits() checks prof_buffer != NULL, we
don't need to check prof_buffer != NULL here unless get_irq_regs() or
user_mode() is such slow that we want to avoid when prof_buffer == NULL).
do_profile_hits() is called from profile_tick() from timer interrupt
only if cpumask_test_cpu(smp_processor_id(), prof_cpu_mask) is true and
prof_buffer is not NULL. But syzbot is also reporting that sometimes
do_profile_hits() is called while current thread is still doing vzalloc(),
where prof_buffer must be NULL at this moment. This indicates that multiple
threads concurrently tried to write to /sys/kernel/profiling interface,
which caused that somebody else try to re-allocate prof_buffer despite
somebody has already allocated prof_buffer. Fix this by using
serialization.
Reported-by: syzbot <syzbot+b1a83ab2a9eb9321fbdd@syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=b1a83ab2a9eb9321fbdd
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: syzbot <syzbot+b1a83ab2a9eb9321fbdd@syzkaller.appspotmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-07-27 18:59:57 +08:00
|
|
|
static DEFINE_MUTEX(lock);
|
2008-10-16 13:01:46 +08:00
|
|
|
|
profiling: remove prof_cpu_mask
syzbot is reporting uninit-value at profile_hits(), for there is a race
window between
if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
return -ENOMEM;
cpumask_copy(prof_cpu_mask, cpu_possible_mask);
in profile_init() and
cpumask_available(prof_cpu_mask) &&
cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
in profile_tick(); prof_cpu_mask remains uninitialzed until cpumask_copy()
completes while cpumask_available(prof_cpu_mask) returns true as soon as
alloc_cpumask_var(&prof_cpu_mask) completes.
We could replace alloc_cpumask_var() with zalloc_cpumask_var() and
call cpumask_copy() from create_proc_profile() on only UP kernels, for
profile_online_cpu() calls cpumask_set_cpu() as needed via
cpuhp_setup_state(CPUHP_AP_ONLINE_DYN) on SMP kernels. But this patch
removes prof_cpu_mask because it seems unnecessary.
The cpumask_test_cpu(smp_processor_id(), prof_cpu_mask) test
in profile_tick() is likely always true due to
a CPU cannot call profile_tick() if that CPU is offline
and
cpumask_set_cpu(cpu, prof_cpu_mask) is called when that CPU becomes
online and cpumask_clear_cpu(cpu, prof_cpu_mask) is called when that
CPU becomes offline
. This test could be false during transition between online and offline.
But according to include/linux/cpuhotplug.h , CPUHP_PROFILE_PREPARE
belongs to PREPARE section, which means that the CPU subjected to
profile_dead_cpu() cannot be inside profile_tick() (i.e. no risk of
use-after-free bug) because interrupt for that CPU is disabled during
PREPARE section. Therefore, this test is guaranteed to be true, and
can be removed. (Since profile_hits() checks prof_buffer != NULL, we
don't need to check prof_buffer != NULL here unless get_irq_regs() or
user_mode() is such slow that we want to avoid when prof_buffer == NULL).
do_profile_hits() is called from profile_tick() from timer interrupt
only if cpumask_test_cpu(smp_processor_id(), prof_cpu_mask) is true and
prof_buffer is not NULL. But syzbot is also reporting that sometimes
do_profile_hits() is called while current thread is still doing vzalloc(),
where prof_buffer must be NULL at this moment. This indicates that multiple
threads concurrently tried to write to /sys/kernel/profiling interface,
which caused that somebody else try to re-allocate prof_buffer despite
somebody has already allocated prof_buffer. Fix this by using
serialization.
Reported-by: syzbot <syzbot+b1a83ab2a9eb9321fbdd@syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=b1a83ab2a9eb9321fbdd
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: syzbot <syzbot+b1a83ab2a9eb9321fbdd@syzkaller.appspotmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-07-27 18:59:57 +08:00
|
|
|
/*
|
|
|
|
* We need serialization, for profile_setup() initializes prof_on
|
|
|
|
* value and profile_init() must not reallocate prof_buffer after
|
|
|
|
* once allocated.
|
|
|
|
*/
|
|
|
|
guard(mutex)(&lock);
|
2008-10-16 13:01:46 +08:00
|
|
|
if (prof_on)
|
|
|
|
return -EEXIST;
|
|
|
|
/*
|
|
|
|
* This eventually calls into get_option() which
|
|
|
|
* has a ton of callers and is not const. It is
|
|
|
|
* easiest to cast it away here.
|
|
|
|
*/
|
|
|
|
profile_setup((char *)buf);
|
|
|
|
ret = profile_init();
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
ret = create_proc_profile();
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
KERNEL_ATTR_RW(profiling);
|
|
|
|
#endif
|
|
|
|
|
2015-09-10 06:38:55 +08:00
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
2007-11-02 20:47:53 +08:00
|
|
|
static ssize_t kexec_loaded_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
2006-06-23 17:05:07 +08:00
|
|
|
{
|
2023-03-24 23:40:41 +08:00
|
|
|
return sysfs_emit(buf, "%d\n", !!kexec_image);
|
2006-06-23 17:05:07 +08:00
|
|
|
}
|
|
|
|
KERNEL_ATTR_RO(kexec_loaded);
|
|
|
|
|
2024-01-24 13:12:44 +08:00
|
|
|
#ifdef CONFIG_CRASH_DUMP
|
2007-11-02 20:47:53 +08:00
|
|
|
static ssize_t kexec_crash_loaded_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
2006-06-23 17:05:07 +08:00
|
|
|
{
|
2023-03-24 23:40:41 +08:00
|
|
|
return sysfs_emit(buf, "%d\n", kexec_crash_loaded());
|
2006-06-23 17:05:07 +08:00
|
|
|
}
|
|
|
|
KERNEL_ATTR_RO(kexec_crash_loaded);
|
2007-10-17 14:27:27 +08:00
|
|
|
|
2009-12-16 08:47:46 +08:00
|
|
|
static ssize_t kexec_crash_size_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
|
|
|
{
|
2022-07-01 06:32:57 +08:00
|
|
|
ssize_t size = crash_get_memory_size();
|
|
|
|
|
|
|
|
if (size < 0)
|
|
|
|
return size;
|
|
|
|
|
2023-03-24 23:40:41 +08:00
|
|
|
return sysfs_emit(buf, "%zd\n", size);
|
2009-12-16 08:47:46 +08:00
|
|
|
}
|
|
|
|
static ssize_t kexec_crash_size_store(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr,
|
|
|
|
const char *buf, size_t count)
|
|
|
|
{
|
|
|
|
unsigned long cnt;
|
|
|
|
int ret;
|
|
|
|
|
2013-09-13 06:14:07 +08:00
|
|
|
if (kstrtoul(buf, 0, &cnt))
|
2009-12-16 08:47:46 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
ret = crash_shrink_memory(cnt);
|
|
|
|
return ret < 0 ? ret : count;
|
|
|
|
}
|
|
|
|
KERNEL_ATTR_RW(kexec_crash_size);
|
|
|
|
|
2024-01-24 13:12:44 +08:00
|
|
|
#endif /* CONFIG_CRASH_DUMP*/
|
crash: move crashkernel parsing and vmcore related code under CONFIG_CRASH_CORE
Patch series "kexec/fadump: remove dependency with CONFIG_KEXEC and
reuse crashkernel parameter for fadump", v4.
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.
This patchset removes dependency with CONFIG_KEXEC for crashkernel
parameter and vmcoreinfo related code as it can be reused without kexec
support. Also, crashkernel parameter is reused instead of
fadump_reserve_mem to reserve memory for fadump.
The first patch moves crashkernel parameter parsing and vmcoreinfo
related code under CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE. The
second patch reuses the definitions of append_elf_note() & final_note()
functions under CONFIG_CRASH_CORE in IA64 arch code. The third patch
removes dependency on CONFIG_KEXEC for firmware-assisted dump (fadump)
in powerpc. The next patch reuses crashkernel parameter for reserving
memory for fadump, instead of the fadump_reserve_mem parameter. This
has the advantage of using all syntaxes crashkernel parameter supports,
for fadump as well. The last patch updates fadump kernel documentation
about use of crashkernel parameter.
This patch (of 5):
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.
But currently, code related to vmcoreinfo and parsing of crashkernel
parameter is built under CONFIG_KEXEC_CORE. This patch introduces
CONFIG_CRASH_CORE and moves the above mentioned code under this config,
allowing code reuse without dependency on CONFIG_KEXEC. There is no
functional change with this patch.
Link: http://lkml.kernel.org/r/149035338104.6881.4550894432615189948.stgit@hbathini.in.ibm.com
Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Acked-by: Dave Young <dyoung@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-05-09 06:56:18 +08:00
|
|
|
#endif /* CONFIG_KEXEC_CORE */
|
|
|
|
|
2024-01-24 13:12:42 +08:00
|
|
|
#ifdef CONFIG_VMCORE_INFO
|
crash: move crashkernel parsing and vmcore related code under CONFIG_CRASH_CORE
Patch series "kexec/fadump: remove dependency with CONFIG_KEXEC and
reuse crashkernel parameter for fadump", v4.
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.
This patchset removes dependency with CONFIG_KEXEC for crashkernel
parameter and vmcoreinfo related code as it can be reused without kexec
support. Also, crashkernel parameter is reused instead of
fadump_reserve_mem to reserve memory for fadump.
The first patch moves crashkernel parameter parsing and vmcoreinfo
related code under CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE. The
second patch reuses the definitions of append_elf_note() & final_note()
functions under CONFIG_CRASH_CORE in IA64 arch code. The third patch
removes dependency on CONFIG_KEXEC for firmware-assisted dump (fadump)
in powerpc. The next patch reuses crashkernel parameter for reserving
memory for fadump, instead of the fadump_reserve_mem parameter. This
has the advantage of using all syntaxes crashkernel parameter supports,
for fadump as well. The last patch updates fadump kernel documentation
about use of crashkernel parameter.
This patch (of 5):
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.
But currently, code related to vmcoreinfo and parsing of crashkernel
parameter is built under CONFIG_KEXEC_CORE. This patch introduces
CONFIG_CRASH_CORE and moves the above mentioned code under this config,
allowing code reuse without dependency on CONFIG_KEXEC. There is no
functional change with this patch.
Link: http://lkml.kernel.org/r/149035338104.6881.4550894432615189948.stgit@hbathini.in.ibm.com
Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Acked-by: Dave Young <dyoung@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-05-09 06:56:18 +08:00
|
|
|
|
2007-11-02 20:47:53 +08:00
|
|
|
static ssize_t vmcoreinfo_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
2007-10-17 14:27:27 +08:00
|
|
|
{
|
2016-08-03 05:06:00 +08:00
|
|
|
phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
|
2023-03-24 23:40:41 +08:00
|
|
|
return sysfs_emit(buf, "%pa %x\n", &vmcore_base,
|
|
|
|
(unsigned int)VMCOREINFO_NOTE_SIZE);
|
2007-10-17 14:27:27 +08:00
|
|
|
}
|
|
|
|
KERNEL_ATTR_RO(vmcoreinfo);
|
|
|
|
|
crash: hotplug support for kexec_load()
The hotplug support for kexec_load() requires changes to the userspace
kexec-tools and a little extra help from the kernel.
Given a kdump capture kernel loaded via kexec_load(), and a subsequent
hotplug event, the crash hotplug handler finds the elfcorehdr and rewrites
it to reflect the hotplug change. That is the desired outcome, however,
at kernel panic time, the purgatory integrity check fails (because the
elfcorehdr changed), and the capture kernel does not boot and no vmcore is
generated.
Therefore, the userspace kexec-tools/kexec must indicate to the kernel
that the elfcorehdr can be modified (because the kexec excluded the
elfcorehdr from the digest, and sized the elfcorehdr memory buffer
appropriately).
To facilitate hotplug support with kexec_load():
- a new kexec flag KEXEC_UPATE_ELFCOREHDR indicates that it is
safe for the kernel to modify the kexec_load()'d elfcorehdr
- the /sys/kernel/crash_elfcorehdr_size node communicates the
preferred size of the elfcorehdr memory buffer
- The sysfs crash_hotplug nodes (ie.
/sys/devices/system/[cpu|memory]/crash_hotplug) dynamically
take into account kexec_file_load() vs kexec_load() and
KEXEC_UPDATE_ELFCOREHDR.
This is critical so that the udev rule processing of crash_hotplug
is all that is needed to determine if the userspace unload-then-load
of the kdump image is to be skipped, or not. The proposed udev
rule change looks like:
# The kernel updates the crash elfcorehdr for CPU and memory changes
SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
The table below indicates the behavior of kexec_load()'d kdump image
updates (with the new udev crash_hotplug rule in place):
Kernel |Kexec
-------+-----+----
Old |Old |New
| a | a
-------+-----+----
New | a | b
-------+-----+----
where kexec 'old' and 'new' delineate kexec-tools has the needed
modifications for the crash hotplug feature, and kernel 'old' and 'new'
delineate the kernel supports this crash hotplug feature.
Behavior 'a' indicates the unload-then-reload of the entire kdump image.
For the kexec 'old' column, the unload-then-reload occurs due to the
missing flag KEXEC_UPDATE_ELFCOREHDR. An 'old' kernel (with 'new' kexec)
does not present the crash_hotplug sysfs node, which leads to the
unload-then-reload of the kdump image.
Behavior 'b' indicates the desired optimized behavior of the kernel
directly modifying the elfcorehdr and avoiding the unload-then-reload of
the kdump image.
If the udev rule is not updated with crash_hotplug node check, then no
matter any combination of kernel or kexec is new or old, the kdump image
continues to be unload-then-reload on hotplug changes.
To fully support crash hotplug feature, there needs to be a rollout of
kernel, kexec-tools and udev rule changes. However, the order of the
rollout of these pieces does not matter; kexec_load()'d kdump images still
function for hotplug as-is.
Link: https://lkml.kernel.org/r/20230814214446.6659-7-eric.devolder@oracle.com
Signed-off-by: Eric DeVolder <eric.devolder@oracle.com>
Suggested-by: Hari Bathini <hbathini@linux.ibm.com>
Acked-by: Hari Bathini <hbathini@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Akhil Raj <lf32.dev@gmail.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Mimi Zohar <zohar@linux.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Weißschuh <linux@weissschuh.net>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-08-15 05:44:44 +08:00
|
|
|
#ifdef CONFIG_CRASH_HOTPLUG
|
|
|
|
static ssize_t crash_elfcorehdr_size_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
|
|
|
{
|
|
|
|
unsigned int sz = crash_get_elfcorehdr_size();
|
|
|
|
|
|
|
|
return sysfs_emit(buf, "%u\n", sz);
|
|
|
|
}
|
|
|
|
KERNEL_ATTR_RO(crash_elfcorehdr_size);
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2024-01-24 13:12:42 +08:00
|
|
|
#endif /* CONFIG_VMCORE_INFO */
|
2006-06-23 17:05:07 +08:00
|
|
|
|
2011-02-28 22:57:17 +08:00
|
|
|
/* whether file capabilities are enabled */
|
|
|
|
static ssize_t fscaps_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
|
|
|
{
|
2023-03-24 23:40:41 +08:00
|
|
|
return sysfs_emit(buf, "%d\n", file_caps_enabled);
|
2011-02-28 22:57:17 +08:00
|
|
|
}
|
|
|
|
KERNEL_ATTR_RO(fscaps);
|
|
|
|
|
2015-12-08 05:09:52 +08:00
|
|
|
#ifndef CONFIG_TINY_RCU
|
2012-10-05 14:59:15 +08:00
|
|
|
int rcu_expedited;
|
|
|
|
static ssize_t rcu_expedited_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
|
|
|
{
|
2023-03-24 23:40:41 +08:00
|
|
|
return sysfs_emit(buf, "%d\n", READ_ONCE(rcu_expedited));
|
2012-10-05 14:59:15 +08:00
|
|
|
}
|
|
|
|
static ssize_t rcu_expedited_store(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr,
|
|
|
|
const char *buf, size_t count)
|
|
|
|
{
|
|
|
|
if (kstrtoint(buf, 0, &rcu_expedited))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
KERNEL_ATTR_RW(rcu_expedited);
|
|
|
|
|
2015-11-25 07:44:06 +08:00
|
|
|
int rcu_normal;
|
|
|
|
static ssize_t rcu_normal_show(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr, char *buf)
|
|
|
|
{
|
2023-03-24 23:40:41 +08:00
|
|
|
return sysfs_emit(buf, "%d\n", READ_ONCE(rcu_normal));
|
2015-11-25 07:44:06 +08:00
|
|
|
}
|
|
|
|
static ssize_t rcu_normal_store(struct kobject *kobj,
|
|
|
|
struct kobj_attribute *attr,
|
|
|
|
const char *buf, size_t count)
|
|
|
|
{
|
|
|
|
if (kstrtoint(buf, 0, &rcu_normal))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
KERNEL_ATTR_RW(rcu_normal);
|
2015-12-08 05:09:52 +08:00
|
|
|
#endif /* #ifndef CONFIG_TINY_RCU */
|
2015-11-25 07:44:06 +08:00
|
|
|
|
2007-07-19 16:48:39 +08:00
|
|
|
/*
|
|
|
|
* Make /sys/kernel/notes give the raw contents of our kernel .notes section.
|
|
|
|
*/
|
2024-04-16 00:20:44 +08:00
|
|
|
extern const void __start_notes;
|
|
|
|
extern const void __stop_notes;
|
2007-07-19 16:48:39 +08:00
|
|
|
#define notes_size (&__stop_notes - &__start_notes)
|
|
|
|
|
2010-05-13 09:28:57 +08:00
|
|
|
static ssize_t notes_read(struct file *filp, struct kobject *kobj,
|
|
|
|
struct bin_attribute *bin_attr,
|
2007-07-19 16:48:39 +08:00
|
|
|
char *buf, loff_t off, size_t count)
|
|
|
|
{
|
|
|
|
memcpy(buf, &__start_notes + off, count);
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2017-02-25 07:00:46 +08:00
|
|
|
static struct bin_attribute notes_attr __ro_after_init = {
|
2007-07-19 16:48:39 +08:00
|
|
|
.attr = {
|
|
|
|
.name = "notes",
|
|
|
|
.mode = S_IRUGO,
|
|
|
|
},
|
|
|
|
.read = ¬es_read,
|
|
|
|
};
|
|
|
|
|
2007-11-07 02:36:58 +08:00
|
|
|
struct kobject *kernel_kobj;
|
|
|
|
EXPORT_SYMBOL_GPL(kernel_kobj);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
static struct attribute * kernel_attrs[] = {
|
2011-02-28 22:57:17 +08:00
|
|
|
&fscaps_attr.attr,
|
2005-11-11 11:58:04 +08:00
|
|
|
&uevent_seqnum_attr.attr,
|
2022-11-03 23:24:07 +08:00
|
|
|
&cpu_byteorder_attr.attr,
|
2022-12-22 00:17:52 +08:00
|
|
|
&address_bits_attr.attr,
|
2014-04-11 05:09:31 +08:00
|
|
|
#ifdef CONFIG_UEVENT_HELPER
|
2005-11-11 11:58:04 +08:00
|
|
|
&uevent_helper_attr.attr,
|
2014-04-11 05:09:31 +08:00
|
|
|
#endif
|
2008-10-16 13:01:46 +08:00
|
|
|
#ifdef CONFIG_PROFILING
|
|
|
|
&profiling_attr.attr,
|
|
|
|
#endif
|
2015-09-10 06:38:55 +08:00
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
2006-06-23 17:05:07 +08:00
|
|
|
&kexec_loaded_attr.attr,
|
2024-01-24 13:12:44 +08:00
|
|
|
#ifdef CONFIG_CRASH_DUMP
|
2006-06-23 17:05:07 +08:00
|
|
|
&kexec_crash_loaded_attr.attr,
|
2009-12-16 08:47:46 +08:00
|
|
|
&kexec_crash_size_attr.attr,
|
crash: move crashkernel parsing and vmcore related code under CONFIG_CRASH_CORE
Patch series "kexec/fadump: remove dependency with CONFIG_KEXEC and
reuse crashkernel parameter for fadump", v4.
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.
This patchset removes dependency with CONFIG_KEXEC for crashkernel
parameter and vmcoreinfo related code as it can be reused without kexec
support. Also, crashkernel parameter is reused instead of
fadump_reserve_mem to reserve memory for fadump.
The first patch moves crashkernel parameter parsing and vmcoreinfo
related code under CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE. The
second patch reuses the definitions of append_elf_note() & final_note()
functions under CONFIG_CRASH_CORE in IA64 arch code. The third patch
removes dependency on CONFIG_KEXEC for firmware-assisted dump (fadump)
in powerpc. The next patch reuses crashkernel parameter for reserving
memory for fadump, instead of the fadump_reserve_mem parameter. This
has the advantage of using all syntaxes crashkernel parameter supports,
for fadump as well. The last patch updates fadump kernel documentation
about use of crashkernel parameter.
This patch (of 5):
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.
But currently, code related to vmcoreinfo and parsing of crashkernel
parameter is built under CONFIG_KEXEC_CORE. This patch introduces
CONFIG_CRASH_CORE and moves the above mentioned code under this config,
allowing code reuse without dependency on CONFIG_KEXEC. There is no
functional change with this patch.
Link: http://lkml.kernel.org/r/149035338104.6881.4550894432615189948.stgit@hbathini.in.ibm.com
Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Acked-by: Dave Young <dyoung@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-05-09 06:56:18 +08:00
|
|
|
#endif
|
2024-01-24 13:12:44 +08:00
|
|
|
#endif
|
2024-01-24 13:12:42 +08:00
|
|
|
#ifdef CONFIG_VMCORE_INFO
|
2007-10-17 14:27:27 +08:00
|
|
|
&vmcoreinfo_attr.attr,
|
crash: hotplug support for kexec_load()
The hotplug support for kexec_load() requires changes to the userspace
kexec-tools and a little extra help from the kernel.
Given a kdump capture kernel loaded via kexec_load(), and a subsequent
hotplug event, the crash hotplug handler finds the elfcorehdr and rewrites
it to reflect the hotplug change. That is the desired outcome, however,
at kernel panic time, the purgatory integrity check fails (because the
elfcorehdr changed), and the capture kernel does not boot and no vmcore is
generated.
Therefore, the userspace kexec-tools/kexec must indicate to the kernel
that the elfcorehdr can be modified (because the kexec excluded the
elfcorehdr from the digest, and sized the elfcorehdr memory buffer
appropriately).
To facilitate hotplug support with kexec_load():
- a new kexec flag KEXEC_UPATE_ELFCOREHDR indicates that it is
safe for the kernel to modify the kexec_load()'d elfcorehdr
- the /sys/kernel/crash_elfcorehdr_size node communicates the
preferred size of the elfcorehdr memory buffer
- The sysfs crash_hotplug nodes (ie.
/sys/devices/system/[cpu|memory]/crash_hotplug) dynamically
take into account kexec_file_load() vs kexec_load() and
KEXEC_UPDATE_ELFCOREHDR.
This is critical so that the udev rule processing of crash_hotplug
is all that is needed to determine if the userspace unload-then-load
of the kdump image is to be skipped, or not. The proposed udev
rule change looks like:
# The kernel updates the crash elfcorehdr for CPU and memory changes
SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
The table below indicates the behavior of kexec_load()'d kdump image
updates (with the new udev crash_hotplug rule in place):
Kernel |Kexec
-------+-----+----
Old |Old |New
| a | a
-------+-----+----
New | a | b
-------+-----+----
where kexec 'old' and 'new' delineate kexec-tools has the needed
modifications for the crash hotplug feature, and kernel 'old' and 'new'
delineate the kernel supports this crash hotplug feature.
Behavior 'a' indicates the unload-then-reload of the entire kdump image.
For the kexec 'old' column, the unload-then-reload occurs due to the
missing flag KEXEC_UPDATE_ELFCOREHDR. An 'old' kernel (with 'new' kexec)
does not present the crash_hotplug sysfs node, which leads to the
unload-then-reload of the kdump image.
Behavior 'b' indicates the desired optimized behavior of the kernel
directly modifying the elfcorehdr and avoiding the unload-then-reload of
the kdump image.
If the udev rule is not updated with crash_hotplug node check, then no
matter any combination of kernel or kexec is new or old, the kdump image
continues to be unload-then-reload on hotplug changes.
To fully support crash hotplug feature, there needs to be a rollout of
kernel, kexec-tools and udev rule changes. However, the order of the
rollout of these pieces does not matter; kexec_load()'d kdump images still
function for hotplug as-is.
Link: https://lkml.kernel.org/r/20230814214446.6659-7-eric.devolder@oracle.com
Signed-off-by: Eric DeVolder <eric.devolder@oracle.com>
Suggested-by: Hari Bathini <hbathini@linux.ibm.com>
Acked-by: Hari Bathini <hbathini@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Akhil Raj <lf32.dev@gmail.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Mimi Zohar <zohar@linux.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Weißschuh <linux@weissschuh.net>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-08-15 05:44:44 +08:00
|
|
|
#ifdef CONFIG_CRASH_HOTPLUG
|
|
|
|
&crash_elfcorehdr_size_attr.attr,
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
2015-12-08 05:09:52 +08:00
|
|
|
#ifndef CONFIG_TINY_RCU
|
2012-10-05 14:59:15 +08:00
|
|
|
&rcu_expedited_attr.attr,
|
2015-11-25 07:44:06 +08:00
|
|
|
&rcu_normal_attr.attr,
|
2015-12-08 05:09:52 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2017-07-11 06:51:14 +08:00
|
|
|
static const struct attribute_group kernel_attr_group = {
|
2005-04-17 06:20:36 +08:00
|
|
|
.attrs = kernel_attrs,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init ksysfs_init(void)
|
|
|
|
{
|
2007-10-30 03:13:17 +08:00
|
|
|
int error;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-11-07 02:36:58 +08:00
|
|
|
kernel_kobj = kobject_create_and_add("kernel", NULL);
|
|
|
|
if (!kernel_kobj) {
|
2007-10-30 03:13:17 +08:00
|
|
|
error = -ENOMEM;
|
|
|
|
goto exit;
|
|
|
|
}
|
2007-11-07 02:36:58 +08:00
|
|
|
error = sysfs_create_group(kernel_kobj, &kernel_attr_group);
|
2007-10-30 03:13:17 +08:00
|
|
|
if (error)
|
|
|
|
goto kset_exit;
|
|
|
|
|
|
|
|
if (notes_size > 0) {
|
2007-07-19 16:48:39 +08:00
|
|
|
notes_attr.size = notes_size;
|
2007-11-07 02:36:58 +08:00
|
|
|
error = sysfs_create_bin_file(kernel_kobj, ¬es_attr);
|
2007-10-30 03:13:17 +08:00
|
|
|
if (error)
|
|
|
|
goto group_exit;
|
2007-07-19 16:48:39 +08:00
|
|
|
}
|
|
|
|
|
2007-10-30 03:13:17 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
group_exit:
|
2007-11-07 02:36:58 +08:00
|
|
|
sysfs_remove_group(kernel_kobj, &kernel_attr_group);
|
2007-10-30 03:13:17 +08:00
|
|
|
kset_exit:
|
2007-12-21 00:13:05 +08:00
|
|
|
kobject_put(kernel_kobj);
|
2007-10-30 03:13:17 +08:00
|
|
|
exit:
|
2005-04-17 06:20:36 +08:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
core_initcall(ksysfs_init);
|