mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-23 20:53:53 +08:00
3404be67bf
Patch series "mm: Expand CONFIG_SLAB_FREELIST_HARDENED to include SLAB"
In reviewing Vlastimil Babka's latest slub debug series, I realized[1]
that several checks under CONFIG_SLAB_FREELIST_HARDENED weren't being
applied to SLAB. Fix this by expanding the Kconfig coverage, and adding a
simple double-free test for SLAB.
This patch (of 2):
Include SLAB caches when performing kmem_cache pointer verification. A
defense against such corruption[1] should be applied to all the
allocators. With this added, the "SLAB_FREE_CROSS" and "SLAB_FREE_PAGE"
LKDTM tests now pass on SLAB:
lkdtm: Performing direct entry SLAB_FREE_CROSS
lkdtm: Attempting cross-cache slab free ...
------------[ cut here ]------------
cache_from_obj: Wrong slab cache. lkdtm-heap-b but object is from lkdtm-heap-a
WARNING: CPU: 2 PID: 2195 at mm/slab.h:530 kmem_cache_free+0x8d/0x1d0
...
lkdtm: Performing direct entry SLAB_FREE_PAGE
lkdtm: Attempting non-Slab slab free ...
------------[ cut here ]------------
virt_to_cache: Object is not a Slab page!
WARNING: CPU: 1 PID: 2202 at mm/slab.h:489 kmem_cache_free+0x196/0x1d0
Additionally clean up neighboring Kconfig entries for clarity,
readability, and redundant option removal.
[1] https://github.com/ThomasKing2014/slides/raw/master/Building%20universal%20Android%20rooting%20with%20a%20type%20confusion%20vulnerability.pdf
Fixes: 598a0717a8
("mm/slab: validate cache membership under freelist hardening")
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Matthew Garrett <mjg59@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Vijayanand Jitta <vjitta@codeaurora.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Link: http://lkml.kernel.org/r/20200625215548.389774-1-keescook@chromium.org
Link: http://lkml.kernel.org/r/20200625215548.389774-2-keescook@chromium.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2345 lines
75 KiB
Plaintext
2345 lines
75 KiB
Plaintext
# SPDX-License-Identifier: GPL-2.0-only
|
|
config DEFCONFIG_LIST
|
|
string
|
|
depends on !UML
|
|
option defconfig_list
|
|
default "/lib/modules/$(shell,uname -r)/.config"
|
|
default "/etc/kernel-config"
|
|
default "/boot/config-$(shell,uname -r)"
|
|
default "arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG)"
|
|
|
|
config CC_VERSION_TEXT
|
|
string
|
|
default "$(CC_VERSION_TEXT)"
|
|
help
|
|
This is used in unclear ways:
|
|
|
|
- Re-run Kconfig when the compiler is updated
|
|
The 'default' property references the environment variable,
|
|
CC_VERSION_TEXT so it is recorded in include/config/auto.conf.cmd.
|
|
When the compiler is updated, Kconfig will be invoked.
|
|
|
|
- Ensure full rebuild when the compier is updated
|
|
include/linux/kconfig.h contains this option in the comment line so
|
|
fixdep adds include/config/cc/version/text.h into the auto-generated
|
|
dependency. When the compiler is updated, syncconfig will touch it
|
|
and then every file will be rebuilt.
|
|
|
|
config CC_IS_GCC
|
|
def_bool $(success,echo "$(CC_VERSION_TEXT)" | grep -q gcc)
|
|
|
|
config GCC_VERSION
|
|
int
|
|
default $(shell,$(srctree)/scripts/gcc-version.sh $(CC)) if CC_IS_GCC
|
|
default 0
|
|
|
|
config LD_VERSION
|
|
int
|
|
default $(shell,$(LD) --version | $(srctree)/scripts/ld-version.sh)
|
|
|
|
config CC_IS_CLANG
|
|
def_bool $(success,echo "$(CC_VERSION_TEXT)" | grep -q clang)
|
|
|
|
config LD_IS_LLD
|
|
def_bool $(success,$(LD) -v | head -n 1 | grep -q LLD)
|
|
|
|
config CLANG_VERSION
|
|
int
|
|
default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
|
|
|
|
config CC_CAN_LINK
|
|
bool
|
|
default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT
|
|
default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m32-flag))
|
|
|
|
config CC_CAN_LINK_STATIC
|
|
bool
|
|
default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag) -static) if 64BIT
|
|
default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m32-flag) -static)
|
|
|
|
config CC_HAS_ASM_GOTO
|
|
def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC))
|
|
|
|
config CC_HAS_ASM_GOTO_OUTPUT
|
|
depends on CC_HAS_ASM_GOTO
|
|
def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null)
|
|
|
|
config TOOLS_SUPPORT_RELR
|
|
def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
|
|
|
|
config CC_HAS_ASM_INLINE
|
|
def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
|
|
|
|
config CONSTRUCTORS
|
|
bool
|
|
depends on !UML
|
|
|
|
config IRQ_WORK
|
|
bool
|
|
|
|
config BUILDTIME_TABLE_SORT
|
|
bool
|
|
|
|
config THREAD_INFO_IN_TASK
|
|
bool
|
|
help
|
|
Select this to move thread_info off the stack into task_struct. To
|
|
make this work, an arch will need to remove all thread_info fields
|
|
except flags and fix any runtime bugs.
|
|
|
|
One subtle change that will be needed is to use try_get_task_stack()
|
|
and put_task_stack() in save_thread_stack_tsk() and get_wchan().
|
|
|
|
menu "General setup"
|
|
|
|
config BROKEN
|
|
bool
|
|
|
|
config BROKEN_ON_SMP
|
|
bool
|
|
depends on BROKEN || !SMP
|
|
default y
|
|
|
|
config INIT_ENV_ARG_LIMIT
|
|
int
|
|
default 32 if !UML
|
|
default 128 if UML
|
|
help
|
|
Maximum of each of the number of arguments and environment
|
|
variables passed to init from the kernel command line.
|
|
|
|
config COMPILE_TEST
|
|
bool "Compile also drivers which will not load"
|
|
depends on !UML
|
|
default n
|
|
help
|
|
Some drivers can be compiled on a different platform than they are
|
|
intended to be run on. Despite they cannot be loaded there (or even
|
|
when they load they cannot be used due to missing HW support),
|
|
developers still, opposing to distributors, might want to build such
|
|
drivers to compile-test them.
|
|
|
|
If you are a developer and want to build everything available, say Y
|
|
here. If you are a user/distributor, say N here to exclude useless
|
|
drivers to be distributed.
|
|
|
|
config UAPI_HEADER_TEST
|
|
bool "Compile test UAPI headers"
|
|
depends on HEADERS_INSTALL && CC_CAN_LINK
|
|
help
|
|
Compile test headers exported to user-space to ensure they are
|
|
self-contained, i.e. compilable as standalone units.
|
|
|
|
If you are a developer or tester and want to ensure the exported
|
|
headers are self-contained, say Y here. Otherwise, choose N.
|
|
|
|
config LOCALVERSION
|
|
string "Local version - append to kernel release"
|
|
help
|
|
Append an extra string to the end of your kernel version.
|
|
This will show up when you type uname, for example.
|
|
The string you set here will be appended after the contents of
|
|
any files with a filename matching localversion* in your
|
|
object and source tree, in that order. Your total string can
|
|
be a maximum of 64 characters.
|
|
|
|
config LOCALVERSION_AUTO
|
|
bool "Automatically append version information to the version string"
|
|
default y
|
|
depends on !COMPILE_TEST
|
|
help
|
|
This will try to automatically determine if the current tree is a
|
|
release tree by looking for git tags that belong to the current
|
|
top of tree revision.
|
|
|
|
A string of the format -gxxxxxxxx will be added to the localversion
|
|
if a git-based tree is found. The string generated by this will be
|
|
appended after any matching localversion* files, and after the value
|
|
set in CONFIG_LOCALVERSION.
|
|
|
|
(The actual string used here is the first eight characters produced
|
|
by running the command:
|
|
|
|
$ git rev-parse --verify HEAD
|
|
|
|
which is done within the script "scripts/setlocalversion".)
|
|
|
|
config BUILD_SALT
|
|
string "Build ID Salt"
|
|
default ""
|
|
help
|
|
The build ID is used to link binaries and their debug info. Setting
|
|
this option will use the value in the calculation of the build id.
|
|
This is mostly useful for distributions which want to ensure the
|
|
build is unique between builds. It's safe to leave the default.
|
|
|
|
config HAVE_KERNEL_GZIP
|
|
bool
|
|
|
|
config HAVE_KERNEL_BZIP2
|
|
bool
|
|
|
|
config HAVE_KERNEL_LZMA
|
|
bool
|
|
|
|
config HAVE_KERNEL_XZ
|
|
bool
|
|
|
|
config HAVE_KERNEL_LZO
|
|
bool
|
|
|
|
config HAVE_KERNEL_LZ4
|
|
bool
|
|
|
|
config HAVE_KERNEL_ZSTD
|
|
bool
|
|
|
|
config HAVE_KERNEL_UNCOMPRESSED
|
|
bool
|
|
|
|
choice
|
|
prompt "Kernel compression mode"
|
|
default KERNEL_GZIP
|
|
depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_ZSTD || HAVE_KERNEL_UNCOMPRESSED
|
|
help
|
|
The linux kernel is a kind of self-extracting executable.
|
|
Several compression algorithms are available, which differ
|
|
in efficiency, compression and decompression speed.
|
|
Compression speed is only relevant when building a kernel.
|
|
Decompression speed is relevant at each boot.
|
|
|
|
If you have any problems with bzip2 or lzma compressed
|
|
kernels, mail me (Alain Knaff) <alain@knaff.lu>. (An older
|
|
version of this functionality (bzip2 only), for 2.4, was
|
|
supplied by Christian Ludwig)
|
|
|
|
High compression options are mostly useful for users, who
|
|
are low on disk space (embedded systems), but for whom ram
|
|
size matters less.
|
|
|
|
If in doubt, select 'gzip'
|
|
|
|
config KERNEL_GZIP
|
|
bool "Gzip"
|
|
depends on HAVE_KERNEL_GZIP
|
|
help
|
|
The old and tried gzip compression. It provides a good balance
|
|
between compression ratio and decompression speed.
|
|
|
|
config KERNEL_BZIP2
|
|
bool "Bzip2"
|
|
depends on HAVE_KERNEL_BZIP2
|
|
help
|
|
Its compression ratio and speed is intermediate.
|
|
Decompression speed is slowest among the choices. The kernel
|
|
size is about 10% smaller with bzip2, in comparison to gzip.
|
|
Bzip2 uses a large amount of memory. For modern kernels you
|
|
will need at least 8MB RAM or more for booting.
|
|
|
|
config KERNEL_LZMA
|
|
bool "LZMA"
|
|
depends on HAVE_KERNEL_LZMA
|
|
help
|
|
This compression algorithm's ratio is best. Decompression speed
|
|
is between gzip and bzip2. Compression is slowest.
|
|
The kernel size is about 33% smaller with LZMA in comparison to gzip.
|
|
|
|
config KERNEL_XZ
|
|
bool "XZ"
|
|
depends on HAVE_KERNEL_XZ
|
|
help
|
|
XZ uses the LZMA2 algorithm and instruction set specific
|
|
BCJ filters which can improve compression ratio of executable
|
|
code. The size of the kernel is about 30% smaller with XZ in
|
|
comparison to gzip. On architectures for which there is a BCJ
|
|
filter (i386, x86_64, ARM, IA-64, PowerPC, and SPARC), XZ
|
|
will create a few percent smaller kernel than plain LZMA.
|
|
|
|
The speed is about the same as with LZMA: The decompression
|
|
speed of XZ is better than that of bzip2 but worse than gzip
|
|
and LZO. Compression is slow.
|
|
|
|
config KERNEL_LZO
|
|
bool "LZO"
|
|
depends on HAVE_KERNEL_LZO
|
|
help
|
|
Its compression ratio is the poorest among the choices. The kernel
|
|
size is about 10% bigger than gzip; however its speed
|
|
(both compression and decompression) is the fastest.
|
|
|
|
config KERNEL_LZ4
|
|
bool "LZ4"
|
|
depends on HAVE_KERNEL_LZ4
|
|
help
|
|
LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding.
|
|
A preliminary version of LZ4 de/compression tool is available at
|
|
<https://code.google.com/p/lz4/>.
|
|
|
|
Its compression ratio is worse than LZO. The size of the kernel
|
|
is about 8% bigger than LZO. But the decompression speed is
|
|
faster than LZO.
|
|
|
|
config KERNEL_ZSTD
|
|
bool "ZSTD"
|
|
depends on HAVE_KERNEL_ZSTD
|
|
help
|
|
ZSTD is a compression algorithm targeting intermediate compression
|
|
with fast decompression speed. It will compress better than GZIP and
|
|
decompress around the same speed as LZO, but slower than LZ4. You
|
|
will need at least 192 KB RAM or more for booting. The zstd command
|
|
line tool is required for compression.
|
|
|
|
config KERNEL_UNCOMPRESSED
|
|
bool "None"
|
|
depends on HAVE_KERNEL_UNCOMPRESSED
|
|
help
|
|
Produce uncompressed kernel image. This option is usually not what
|
|
you want. It is useful for debugging the kernel in slow simulation
|
|
environments, where decompressing and moving the kernel is awfully
|
|
slow. This option allows early boot code to skip the decompressor
|
|
and jump right at uncompressed kernel image.
|
|
|
|
endchoice
|
|
|
|
config DEFAULT_INIT
|
|
string "Default init path"
|
|
default ""
|
|
help
|
|
This option determines the default init for the system if no init=
|
|
option is passed on the kernel command line. If the requested path is
|
|
not present, we will still then move on to attempting further
|
|
locations (e.g. /sbin/init, etc). If this is empty, we will just use
|
|
the fallback list when init= is not passed.
|
|
|
|
config DEFAULT_HOSTNAME
|
|
string "Default hostname"
|
|
default "(none)"
|
|
help
|
|
This option determines the default system hostname before userspace
|
|
calls sethostname(2). The kernel traditionally uses "(none)" here,
|
|
but you may wish to use a different default here to make a minimal
|
|
system more usable with less configuration.
|
|
|
|
#
|
|
# For some reason microblaze and nios2 hard code SWAP=n. Hopefully we can
|
|
# add proper SWAP support to them, in which case this can be remove.
|
|
#
|
|
config ARCH_NO_SWAP
|
|
bool
|
|
|
|
config SWAP
|
|
bool "Support for paging of anonymous memory (swap)"
|
|
depends on MMU && BLOCK && !ARCH_NO_SWAP
|
|
default y
|
|
help
|
|
This option allows you to choose whether you want to have support
|
|
for so called swap devices or swap files in your kernel that are
|
|
used to provide more virtual memory than the actual RAM present
|
|
in your computer. If unsure say Y.
|
|
|
|
config SYSVIPC
|
|
bool "System V IPC"
|
|
help
|
|
Inter Process Communication is a suite of library functions and
|
|
system calls which let processes (running programs) synchronize and
|
|
exchange information. It is generally considered to be a good thing,
|
|
and some programs won't run unless you say Y here. In particular, if
|
|
you want to run the DOS emulator dosemu under Linux (read the
|
|
DOSEMU-HOWTO, available from <http://www.tldp.org/docs.html#howto>),
|
|
you'll need to say Y here.
|
|
|
|
You can find documentation about IPC with "info ipc" and also in
|
|
section 6.4 of the Linux Programmer's Guide, available from
|
|
<http://www.tldp.org/guides.html>.
|
|
|
|
config SYSVIPC_SYSCTL
|
|
bool
|
|
depends on SYSVIPC
|
|
depends on SYSCTL
|
|
default y
|
|
|
|
config POSIX_MQUEUE
|
|
bool "POSIX Message Queues"
|
|
depends on NET
|
|
help
|
|
POSIX variant of message queues is a part of IPC. In POSIX message
|
|
queues every message has a priority which decides about succession
|
|
of receiving it by a process. If you want to compile and run
|
|
programs written e.g. for Solaris with use of its POSIX message
|
|
queues (functions mq_*) say Y here.
|
|
|
|
POSIX message queues are visible as a filesystem called 'mqueue'
|
|
and can be mounted somewhere if you want to do filesystem
|
|
operations on message queues.
|
|
|
|
If unsure, say Y.
|
|
|
|
config POSIX_MQUEUE_SYSCTL
|
|
bool
|
|
depends on POSIX_MQUEUE
|
|
depends on SYSCTL
|
|
default y
|
|
|
|
config WATCH_QUEUE
|
|
bool "General notification queue"
|
|
default n
|
|
help
|
|
|
|
This is a general notification queue for the kernel to pass events to
|
|
userspace by splicing them into pipes. It can be used in conjunction
|
|
with watches for key/keyring change notifications and device
|
|
notifications.
|
|
|
|
See Documentation/watch_queue.rst
|
|
|
|
config CROSS_MEMORY_ATTACH
|
|
bool "Enable process_vm_readv/writev syscalls"
|
|
depends on MMU
|
|
default y
|
|
help
|
|
Enabling this option adds the system calls process_vm_readv and
|
|
process_vm_writev which allow a process with the correct privileges
|
|
to directly read from or write to another process' address space.
|
|
See the man page for more details.
|
|
|
|
config USELIB
|
|
bool "uselib syscall"
|
|
def_bool ALPHA || M68K || SPARC || X86_32 || IA32_EMULATION
|
|
help
|
|
This option enables the uselib syscall, a system call used in the
|
|
dynamic linker from libc5 and earlier. glibc does not use this
|
|
system call. If you intend to run programs built on libc5 or
|
|
earlier, you may need to enable this syscall. Current systems
|
|
running glibc can safely disable this.
|
|
|
|
config AUDIT
|
|
bool "Auditing support"
|
|
depends on NET
|
|
help
|
|
Enable auditing infrastructure that can be used with another
|
|
kernel subsystem, such as SELinux (which requires this for
|
|
logging of avc messages output). System call auditing is included
|
|
on architectures which support it.
|
|
|
|
config HAVE_ARCH_AUDITSYSCALL
|
|
bool
|
|
|
|
config AUDITSYSCALL
|
|
def_bool y
|
|
depends on AUDIT && HAVE_ARCH_AUDITSYSCALL
|
|
select FSNOTIFY
|
|
|
|
source "kernel/irq/Kconfig"
|
|
source "kernel/time/Kconfig"
|
|
source "kernel/Kconfig.preempt"
|
|
|
|
menu "CPU/Task time and stats accounting"
|
|
|
|
config VIRT_CPU_ACCOUNTING
|
|
bool
|
|
|
|
choice
|
|
prompt "Cputime accounting"
|
|
default TICK_CPU_ACCOUNTING if !PPC64
|
|
default VIRT_CPU_ACCOUNTING_NATIVE if PPC64
|
|
|
|
# Kind of a stub config for the pure tick based cputime accounting
|
|
config TICK_CPU_ACCOUNTING
|
|
bool "Simple tick based cputime accounting"
|
|
depends on !S390 && !NO_HZ_FULL
|
|
help
|
|
This is the basic tick based cputime accounting that maintains
|
|
statistics about user, system and idle time spent on per jiffies
|
|
granularity.
|
|
|
|
If unsure, say Y.
|
|
|
|
config VIRT_CPU_ACCOUNTING_NATIVE
|
|
bool "Deterministic task and CPU time accounting"
|
|
depends on HAVE_VIRT_CPU_ACCOUNTING && !NO_HZ_FULL
|
|
select VIRT_CPU_ACCOUNTING
|
|
help
|
|
Select this option to enable more accurate task and CPU time
|
|
accounting. This is done by reading a CPU counter on each
|
|
kernel entry and exit and on transitions within the kernel
|
|
between system, softirq and hardirq state, so there is a
|
|
small performance impact. In the case of s390 or IBM POWER > 5,
|
|
this also enables accounting of stolen time on logically-partitioned
|
|
systems.
|
|
|
|
config VIRT_CPU_ACCOUNTING_GEN
|
|
bool "Full dynticks CPU time accounting"
|
|
depends on HAVE_CONTEXT_TRACKING
|
|
depends on HAVE_VIRT_CPU_ACCOUNTING_GEN
|
|
depends on GENERIC_CLOCKEVENTS
|
|
select VIRT_CPU_ACCOUNTING
|
|
select CONTEXT_TRACKING
|
|
help
|
|
Select this option to enable task and CPU time accounting on full
|
|
dynticks systems. This accounting is implemented by watching every
|
|
kernel-user boundaries using the context tracking subsystem.
|
|
The accounting is thus performed at the expense of some significant
|
|
overhead.
|
|
|
|
For now this is only useful if you are working on the full
|
|
dynticks subsystem development.
|
|
|
|
If unsure, say N.
|
|
|
|
endchoice
|
|
|
|
config IRQ_TIME_ACCOUNTING
|
|
bool "Fine granularity task level IRQ time accounting"
|
|
depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE
|
|
help
|
|
Select this option to enable fine granularity task irq time
|
|
accounting. This is done by reading a timestamp on each
|
|
transitions between softirq and hardirq state, so there can be a
|
|
small performance impact.
|
|
|
|
If in doubt, say N here.
|
|
|
|
config HAVE_SCHED_AVG_IRQ
|
|
def_bool y
|
|
depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING
|
|
depends on SMP
|
|
|
|
config SCHED_THERMAL_PRESSURE
|
|
bool
|
|
default y if ARM && ARM_CPU_TOPOLOGY
|
|
default y if ARM64
|
|
depends on SMP
|
|
depends on CPU_FREQ_THERMAL
|
|
help
|
|
Select this option to enable thermal pressure accounting in the
|
|
scheduler. Thermal pressure is the value conveyed to the scheduler
|
|
that reflects the reduction in CPU compute capacity resulted from
|
|
thermal throttling. Thermal throttling occurs when the performance of
|
|
a CPU is capped due to high operating temperatures.
|
|
|
|
If selected, the scheduler will be able to balance tasks accordingly,
|
|
i.e. put less load on throttled CPUs than on non/less throttled ones.
|
|
|
|
This requires the architecture to implement
|
|
arch_set_thermal_pressure() and arch_get_thermal_pressure().
|
|
|
|
config BSD_PROCESS_ACCT
|
|
bool "BSD Process Accounting"
|
|
depends on MULTIUSER
|
|
help
|
|
If you say Y here, a user level program will be able to instruct the
|
|
kernel (via a special system call) to write process accounting
|
|
information to a file: whenever a process exits, information about
|
|
that process will be appended to the file by the kernel. The
|
|
information includes things such as creation time, owning user,
|
|
command name, memory usage, controlling terminal etc. (the complete
|
|
list is in the struct acct in <file:include/linux/acct.h>). It is
|
|
up to the user level program to do useful things with this
|
|
information. This is generally a good idea, so say Y.
|
|
|
|
config BSD_PROCESS_ACCT_V3
|
|
bool "BSD Process Accounting version 3 file format"
|
|
depends on BSD_PROCESS_ACCT
|
|
default n
|
|
help
|
|
If you say Y here, the process accounting information is written
|
|
in a new file format that also logs the process IDs of each
|
|
process and its parent. Note that this file format is incompatible
|
|
with previous v0/v1/v2 file formats, so you will need updated tools
|
|
for processing it. A preliminary version of these tools is available
|
|
at <http://www.gnu.org/software/acct/>.
|
|
|
|
config TASKSTATS
|
|
bool "Export task/process statistics through netlink"
|
|
depends on NET
|
|
depends on MULTIUSER
|
|
default n
|
|
help
|
|
Export selected statistics for tasks/processes through the
|
|
generic netlink interface. Unlike BSD process accounting, the
|
|
statistics are available during the lifetime of tasks/processes as
|
|
responses to commands. Like BSD accounting, they are sent to user
|
|
space on task exit.
|
|
|
|
Say N if unsure.
|
|
|
|
config TASK_DELAY_ACCT
|
|
bool "Enable per-task delay accounting"
|
|
depends on TASKSTATS
|
|
select SCHED_INFO
|
|
help
|
|
Collect information on time spent by a task waiting for system
|
|
resources like cpu, synchronous block I/O completion and swapping
|
|
in pages. Such statistics can help in setting a task's priorities
|
|
relative to other tasks for cpu, io, rss limits etc.
|
|
|
|
Say N if unsure.
|
|
|
|
config TASK_XACCT
|
|
bool "Enable extended accounting over taskstats"
|
|
depends on TASKSTATS
|
|
help
|
|
Collect extended task accounting data and send the data
|
|
to userland for processing over the taskstats interface.
|
|
|
|
Say N if unsure.
|
|
|
|
config TASK_IO_ACCOUNTING
|
|
bool "Enable per-task storage I/O accounting"
|
|
depends on TASK_XACCT
|
|
help
|
|
Collect information on the number of bytes of storage I/O which this
|
|
task has caused.
|
|
|
|
Say N if unsure.
|
|
|
|
config PSI
|
|
bool "Pressure stall information tracking"
|
|
help
|
|
Collect metrics that indicate how overcommitted the CPU, memory,
|
|
and IO capacity are in the system.
|
|
|
|
If you say Y here, the kernel will create /proc/pressure/ with the
|
|
pressure statistics files cpu, memory, and io. These will indicate
|
|
the share of walltime in which some or all tasks in the system are
|
|
delayed due to contention of the respective resource.
|
|
|
|
In kernels with cgroup support, cgroups (cgroup2 only) will
|
|
have cpu.pressure, memory.pressure, and io.pressure files,
|
|
which aggregate pressure stalls for the grouped tasks only.
|
|
|
|
For more details see Documentation/accounting/psi.rst.
|
|
|
|
Say N if unsure.
|
|
|
|
config PSI_DEFAULT_DISABLED
|
|
bool "Require boot parameter to enable pressure stall information tracking"
|
|
default n
|
|
depends on PSI
|
|
help
|
|
If set, pressure stall information tracking will be disabled
|
|
per default but can be enabled through passing psi=1 on the
|
|
kernel commandline during boot.
|
|
|
|
This feature adds some code to the task wakeup and sleep
|
|
paths of the scheduler. The overhead is too low to affect
|
|
common scheduling-intense workloads in practice (such as
|
|
webservers, memcache), but it does show up in artificial
|
|
scheduler stress tests, such as hackbench.
|
|
|
|
If you are paranoid and not sure what the kernel will be
|
|
used for, say Y.
|
|
|
|
Say N if unsure.
|
|
|
|
endmenu # "CPU/Task time and stats accounting"
|
|
|
|
config CPU_ISOLATION
|
|
bool "CPU isolation"
|
|
depends on SMP || COMPILE_TEST
|
|
default y
|
|
help
|
|
Make sure that CPUs running critical tasks are not disturbed by
|
|
any source of "noise" such as unbound workqueues, timers, kthreads...
|
|
Unbound jobs get offloaded to housekeeping CPUs. This is driven by
|
|
the "isolcpus=" boot parameter.
|
|
|
|
Say Y if unsure.
|
|
|
|
source "kernel/rcu/Kconfig"
|
|
|
|
config BUILD_BIN2C
|
|
bool
|
|
default n
|
|
|
|
config IKCONFIG
|
|
tristate "Kernel .config support"
|
|
help
|
|
This option enables the complete Linux kernel ".config" file
|
|
contents to be saved in the kernel. It provides documentation
|
|
of which kernel options are used in a running kernel or in an
|
|
on-disk kernel. This information can be extracted from the kernel
|
|
image file with the script scripts/extract-ikconfig and used as
|
|
input to rebuild the current kernel or to build another kernel.
|
|
It can also be extracted from a running kernel by reading
|
|
/proc/config.gz if enabled (below).
|
|
|
|
config IKCONFIG_PROC
|
|
bool "Enable access to .config through /proc/config.gz"
|
|
depends on IKCONFIG && PROC_FS
|
|
help
|
|
This option enables access to the kernel configuration file
|
|
through /proc/config.gz.
|
|
|
|
config IKHEADERS
|
|
tristate "Enable kernel headers through /sys/kernel/kheaders.tar.xz"
|
|
depends on SYSFS
|
|
help
|
|
This option enables access to the in-kernel headers that are generated during
|
|
the build process. These can be used to build eBPF tracing programs,
|
|
or similar programs. If you build the headers as a module, a module called
|
|
kheaders.ko is built which can be loaded on-demand to get access to headers.
|
|
|
|
config LOG_BUF_SHIFT
|
|
int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
|
|
range 12 25
|
|
default 17
|
|
depends on PRINTK
|
|
help
|
|
Select the minimal kernel log buffer size as a power of 2.
|
|
The final size is affected by LOG_CPU_MAX_BUF_SHIFT config
|
|
parameter, see below. Any higher size also might be forced
|
|
by "log_buf_len" boot parameter.
|
|
|
|
Examples:
|
|
17 => 128 KB
|
|
16 => 64 KB
|
|
15 => 32 KB
|
|
14 => 16 KB
|
|
13 => 8 KB
|
|
12 => 4 KB
|
|
|
|
config LOG_CPU_MAX_BUF_SHIFT
|
|
int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
|
|
depends on SMP
|
|
range 0 21
|
|
default 12 if !BASE_SMALL
|
|
default 0 if BASE_SMALL
|
|
depends on PRINTK
|
|
help
|
|
This option allows to increase the default ring buffer size
|
|
according to the number of CPUs. The value defines the contribution
|
|
of each CPU as a power of 2. The used space is typically only few
|
|
lines however it might be much more when problems are reported,
|
|
e.g. backtraces.
|
|
|
|
The increased size means that a new buffer has to be allocated and
|
|
the original static one is unused. It makes sense only on systems
|
|
with more CPUs. Therefore this value is used only when the sum of
|
|
contributions is greater than the half of the default kernel ring
|
|
buffer as defined by LOG_BUF_SHIFT. The default values are set
|
|
so that more than 64 CPUs are needed to trigger the allocation.
|
|
|
|
Also this option is ignored when "log_buf_len" kernel parameter is
|
|
used as it forces an exact (power of two) size of the ring buffer.
|
|
|
|
The number of possible CPUs is used for this computation ignoring
|
|
hotplugging making the computation optimal for the worst case
|
|
scenario while allowing a simple algorithm to be used from bootup.
|
|
|
|
Examples shift values and their meaning:
|
|
17 => 128 KB for each CPU
|
|
16 => 64 KB for each CPU
|
|
15 => 32 KB for each CPU
|
|
14 => 16 KB for each CPU
|
|
13 => 8 KB for each CPU
|
|
12 => 4 KB for each CPU
|
|
|
|
config PRINTK_SAFE_LOG_BUF_SHIFT
|
|
int "Temporary per-CPU printk log buffer size (12 => 4KB, 13 => 8KB)"
|
|
range 10 21
|
|
default 13
|
|
depends on PRINTK
|
|
help
|
|
Select the size of an alternate printk per-CPU buffer where messages
|
|
printed from usafe contexts are temporary stored. One example would
|
|
be NMI messages, another one - printk recursion. The messages are
|
|
copied to the main log buffer in a safe context to avoid a deadlock.
|
|
The value defines the size as a power of 2.
|
|
|
|
Those messages are rare and limited. The largest one is when
|
|
a backtrace is printed. It usually fits into 4KB. Select
|
|
8KB if you want to be on the safe side.
|
|
|
|
Examples:
|
|
17 => 128 KB for each CPU
|
|
16 => 64 KB for each CPU
|
|
15 => 32 KB for each CPU
|
|
14 => 16 KB for each CPU
|
|
13 => 8 KB for each CPU
|
|
12 => 4 KB for each CPU
|
|
|
|
#
|
|
# Architectures with an unreliable sched_clock() should select this:
|
|
#
|
|
config HAVE_UNSTABLE_SCHED_CLOCK
|
|
bool
|
|
|
|
config GENERIC_SCHED_CLOCK
|
|
bool
|
|
|
|
menu "Scheduler features"
|
|
|
|
config UCLAMP_TASK
|
|
bool "Enable utilization clamping for RT/FAIR tasks"
|
|
depends on CPU_FREQ_GOV_SCHEDUTIL
|
|
help
|
|
This feature enables the scheduler to track the clamped utilization
|
|
of each CPU based on RUNNABLE tasks scheduled on that CPU.
|
|
|
|
With this option, the user can specify the min and max CPU
|
|
utilization allowed for RUNNABLE tasks. The max utilization defines
|
|
the maximum frequency a task should use while the min utilization
|
|
defines the minimum frequency it should use.
|
|
|
|
Both min and max utilization clamp values are hints to the scheduler,
|
|
aiming at improving its frequency selection policy, but they do not
|
|
enforce or grant any specific bandwidth for tasks.
|
|
|
|
If in doubt, say N.
|
|
|
|
config UCLAMP_BUCKETS_COUNT
|
|
int "Number of supported utilization clamp buckets"
|
|
range 5 20
|
|
default 5
|
|
depends on UCLAMP_TASK
|
|
help
|
|
Defines the number of clamp buckets to use. The range of each bucket
|
|
will be SCHED_CAPACITY_SCALE/UCLAMP_BUCKETS_COUNT. The higher the
|
|
number of clamp buckets the finer their granularity and the higher
|
|
the precision of clamping aggregation and tracking at run-time.
|
|
|
|
For example, with the minimum configuration value we will have 5
|
|
clamp buckets tracking 20% utilization each. A 25% boosted tasks will
|
|
be refcounted in the [20..39]% bucket and will set the bucket clamp
|
|
effective value to 25%.
|
|
If a second 30% boosted task should be co-scheduled on the same CPU,
|
|
that task will be refcounted in the same bucket of the first task and
|
|
it will boost the bucket clamp effective value to 30%.
|
|
The clamp effective value of a bucket is reset to its nominal value
|
|
(20% in the example above) when there are no more tasks refcounted in
|
|
that bucket.
|
|
|
|
An additional boost/capping margin can be added to some tasks. In the
|
|
example above the 25% task will be boosted to 30% until it exits the
|
|
CPU. If that should be considered not acceptable on certain systems,
|
|
it's always possible to reduce the margin by increasing the number of
|
|
clamp buckets to trade off used memory for run-time tracking
|
|
precision.
|
|
|
|
If in doubt, use the default value.
|
|
|
|
endmenu
|
|
|
|
#
|
|
# For architectures that want to enable the support for NUMA-affine scheduler
|
|
# balancing logic:
|
|
#
|
|
config ARCH_SUPPORTS_NUMA_BALANCING
|
|
bool
|
|
|
|
#
|
|
# For architectures that prefer to flush all TLBs after a number of pages
|
|
# are unmapped instead of sending one IPI per page to flush. The architecture
|
|
# must provide guarantees on what happens if a clean TLB cache entry is
|
|
# written after the unmap. Details are in mm/rmap.c near the check for
|
|
# should_defer_flush. The architecture should also consider if the full flush
|
|
# and the refill costs are offset by the savings of sending fewer IPIs.
|
|
config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
|
bool
|
|
|
|
config CC_HAS_INT128
|
|
def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT
|
|
|
|
#
|
|
# For architectures that know their GCC __int128 support is sound
|
|
#
|
|
config ARCH_SUPPORTS_INT128
|
|
bool
|
|
|
|
# For architectures that (ab)use NUMA to represent different memory regions
|
|
# all cpu-local but of different latencies, such as SuperH.
|
|
#
|
|
config ARCH_WANT_NUMA_VARIABLE_LOCALITY
|
|
bool
|
|
|
|
config NUMA_BALANCING
|
|
bool "Memory placement aware NUMA scheduler"
|
|
depends on ARCH_SUPPORTS_NUMA_BALANCING
|
|
depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
|
|
depends on SMP && NUMA && MIGRATION
|
|
help
|
|
This option adds support for automatic NUMA aware memory/task placement.
|
|
The mechanism is quite primitive and is based on migrating memory when
|
|
it has references to the node the task is running on.
|
|
|
|
This system will be inactive on UMA systems.
|
|
|
|
config NUMA_BALANCING_DEFAULT_ENABLED
|
|
bool "Automatically enable NUMA aware memory/task placement"
|
|
default y
|
|
depends on NUMA_BALANCING
|
|
help
|
|
If set, automatic NUMA balancing will be enabled if running on a NUMA
|
|
machine.
|
|
|
|
menuconfig CGROUPS
|
|
bool "Control Group support"
|
|
select KERNFS
|
|
help
|
|
This option adds support for grouping sets of processes together, for
|
|
use with process control subsystems such as Cpusets, CFS, memory
|
|
controls or device isolation.
|
|
See
|
|
- Documentation/scheduler/sched-design-CFS.rst (CFS)
|
|
- Documentation/admin-guide/cgroup-v1/ (features for grouping, isolation
|
|
and resource control)
|
|
|
|
Say N if unsure.
|
|
|
|
if CGROUPS
|
|
|
|
config PAGE_COUNTER
|
|
bool
|
|
|
|
config MEMCG
|
|
bool "Memory controller"
|
|
select PAGE_COUNTER
|
|
select EVENTFD
|
|
help
|
|
Provides control over the memory footprint of tasks in a cgroup.
|
|
|
|
config MEMCG_SWAP
|
|
bool
|
|
depends on MEMCG && SWAP
|
|
default y
|
|
|
|
config MEMCG_KMEM
|
|
bool
|
|
depends on MEMCG && !SLOB
|
|
default y
|
|
|
|
config BLK_CGROUP
|
|
bool "IO controller"
|
|
depends on BLOCK
|
|
default n
|
|
help
|
|
Generic block IO controller cgroup interface. This is the common
|
|
cgroup interface which should be used by various IO controlling
|
|
policies.
|
|
|
|
Currently, CFQ IO scheduler uses it to recognize task groups and
|
|
control disk bandwidth allocation (proportional time slice allocation)
|
|
to such task groups. It is also used by bio throttling logic in
|
|
block layer to implement upper limit in IO rates on a device.
|
|
|
|
This option only enables generic Block IO controller infrastructure.
|
|
One needs to also enable actual IO controlling logic/policy. For
|
|
enabling proportional weight division of disk bandwidth in CFQ, set
|
|
CONFIG_BFQ_GROUP_IOSCHED=y; for enabling throttling policy, set
|
|
CONFIG_BLK_DEV_THROTTLING=y.
|
|
|
|
See Documentation/admin-guide/cgroup-v1/blkio-controller.rst for more information.
|
|
|
|
config CGROUP_WRITEBACK
|
|
bool
|
|
depends on MEMCG && BLK_CGROUP
|
|
default y
|
|
|
|
menuconfig CGROUP_SCHED
|
|
bool "CPU controller"
|
|
default n
|
|
help
|
|
This feature lets CPU scheduler recognize task groups and control CPU
|
|
bandwidth allocation to such task groups. It uses cgroups to group
|
|
tasks.
|
|
|
|
if CGROUP_SCHED
|
|
config FAIR_GROUP_SCHED
|
|
bool "Group scheduling for SCHED_OTHER"
|
|
depends on CGROUP_SCHED
|
|
default CGROUP_SCHED
|
|
|
|
config CFS_BANDWIDTH
|
|
bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
|
|
depends on FAIR_GROUP_SCHED
|
|
default n
|
|
help
|
|
This option allows users to define CPU bandwidth rates (limits) for
|
|
tasks running within the fair group scheduler. Groups with no limit
|
|
set are considered to be unconstrained and will run with no
|
|
restriction.
|
|
See Documentation/scheduler/sched-bwc.rst for more information.
|
|
|
|
config RT_GROUP_SCHED
|
|
bool "Group scheduling for SCHED_RR/FIFO"
|
|
depends on CGROUP_SCHED
|
|
default n
|
|
help
|
|
This feature lets you explicitly allocate real CPU bandwidth
|
|
to task groups. If enabled, it will also make it impossible to
|
|
schedule realtime tasks for non-root users until you allocate
|
|
realtime bandwidth for them.
|
|
See Documentation/scheduler/sched-rt-group.rst for more information.
|
|
|
|
endif #CGROUP_SCHED
|
|
|
|
config UCLAMP_TASK_GROUP
|
|
bool "Utilization clamping per group of tasks"
|
|
depends on CGROUP_SCHED
|
|
depends on UCLAMP_TASK
|
|
default n
|
|
help
|
|
This feature enables the scheduler to track the clamped utilization
|
|
of each CPU based on RUNNABLE tasks currently scheduled on that CPU.
|
|
|
|
When this option is enabled, the user can specify a min and max
|
|
CPU bandwidth which is allowed for each single task in a group.
|
|
The max bandwidth allows to clamp the maximum frequency a task
|
|
can use, while the min bandwidth allows to define a minimum
|
|
frequency a task will always use.
|
|
|
|
When task group based utilization clamping is enabled, an eventually
|
|
specified task-specific clamp value is constrained by the cgroup
|
|
specified clamp value. Both minimum and maximum task clamping cannot
|
|
be bigger than the corresponding clamping defined at task group level.
|
|
|
|
If in doubt, say N.
|
|
|
|
config CGROUP_PIDS
|
|
bool "PIDs controller"
|
|
help
|
|
Provides enforcement of process number limits in the scope of a
|
|
cgroup. Any attempt to fork more processes than is allowed in the
|
|
cgroup will fail. PIDs are fundamentally a global resource because it
|
|
is fairly trivial to reach PID exhaustion before you reach even a
|
|
conservative kmemcg limit. As a result, it is possible to grind a
|
|
system to halt without being limited by other cgroup policies. The
|
|
PIDs controller is designed to stop this from happening.
|
|
|
|
It should be noted that organisational operations (such as attaching
|
|
to a cgroup hierarchy) will *not* be blocked by the PIDs controller,
|
|
since the PIDs limit only affects a process's ability to fork, not to
|
|
attach to a cgroup.
|
|
|
|
config CGROUP_RDMA
|
|
bool "RDMA controller"
|
|
help
|
|
Provides enforcement of RDMA resources defined by IB stack.
|
|
It is fairly easy for consumers to exhaust RDMA resources, which
|
|
can result into resource unavailability to other consumers.
|
|
RDMA controller is designed to stop this from happening.
|
|
Attaching processes with active RDMA resources to the cgroup
|
|
hierarchy is allowed even if can cross the hierarchy's limit.
|
|
|
|
config CGROUP_FREEZER
|
|
bool "Freezer controller"
|
|
help
|
|
Provides a way to freeze and unfreeze all tasks in a
|
|
cgroup.
|
|
|
|
This option affects the ORIGINAL cgroup interface. The cgroup2 memory
|
|
controller includes important in-kernel memory consumers per default.
|
|
|
|
If you're using cgroup2, say N.
|
|
|
|
config CGROUP_HUGETLB
|
|
bool "HugeTLB controller"
|
|
depends on HUGETLB_PAGE
|
|
select PAGE_COUNTER
|
|
default n
|
|
help
|
|
Provides a cgroup controller for HugeTLB pages.
|
|
When you enable this, you can put a per cgroup limit on HugeTLB usage.
|
|
The limit is enforced during page fault. Since HugeTLB doesn't
|
|
support page reclaim, enforcing the limit at page fault time implies
|
|
that, the application will get SIGBUS signal if it tries to access
|
|
HugeTLB pages beyond its limit. This requires the application to know
|
|
beforehand how much HugeTLB pages it would require for its use. The
|
|
control group is tracked in the third page lru pointer. This means
|
|
that we cannot use the controller with huge page less than 3 pages.
|
|
|
|
config CPUSETS
|
|
bool "Cpuset controller"
|
|
depends on SMP
|
|
help
|
|
This option will let you create and manage CPUSETs which
|
|
allow dynamically partitioning a system into sets of CPUs and
|
|
Memory Nodes and assigning tasks to run only within those sets.
|
|
This is primarily useful on large SMP or NUMA systems.
|
|
|
|
Say N if unsure.
|
|
|
|
config PROC_PID_CPUSET
|
|
bool "Include legacy /proc/<pid>/cpuset file"
|
|
depends on CPUSETS
|
|
default y
|
|
|
|
config CGROUP_DEVICE
|
|
bool "Device controller"
|
|
help
|
|
Provides a cgroup controller implementing whitelists for
|
|
devices which a process in the cgroup can mknod or open.
|
|
|
|
config CGROUP_CPUACCT
|
|
bool "Simple CPU accounting controller"
|
|
help
|
|
Provides a simple controller for monitoring the
|
|
total CPU consumed by the tasks in a cgroup.
|
|
|
|
config CGROUP_PERF
|
|
bool "Perf controller"
|
|
depends on PERF_EVENTS
|
|
help
|
|
This option extends the perf per-cpu mode to restrict monitoring
|
|
to threads which belong to the cgroup specified and run on the
|
|
designated cpu. Or this can be used to have cgroup ID in samples
|
|
so that it can monitor performance events among cgroups.
|
|
|
|
Say N if unsure.
|
|
|
|
config CGROUP_BPF
|
|
bool "Support for eBPF programs attached to cgroups"
|
|
depends on BPF_SYSCALL
|
|
select SOCK_CGROUP_DATA
|
|
help
|
|
Allow attaching eBPF programs to a cgroup using the bpf(2)
|
|
syscall command BPF_PROG_ATTACH.
|
|
|
|
In which context these programs are accessed depends on the type
|
|
of attachment. For instance, programs that are attached using
|
|
BPF_CGROUP_INET_INGRESS will be executed on the ingress path of
|
|
inet sockets.
|
|
|
|
config CGROUP_DEBUG
|
|
bool "Debug controller"
|
|
default n
|
|
depends on DEBUG_KERNEL
|
|
help
|
|
This option enables a simple controller that exports
|
|
debugging information about the cgroups framework. This
|
|
controller is for control cgroup debugging only. Its
|
|
interfaces are not stable.
|
|
|
|
Say N.
|
|
|
|
config SOCK_CGROUP_DATA
|
|
bool
|
|
default n
|
|
|
|
endif # CGROUPS
|
|
|
|
menuconfig NAMESPACES
|
|
bool "Namespaces support" if EXPERT
|
|
depends on MULTIUSER
|
|
default !EXPERT
|
|
help
|
|
Provides the way to make tasks work with different objects using
|
|
the same id. For example same IPC id may refer to different objects
|
|
or same user id or pid may refer to different tasks when used in
|
|
different namespaces.
|
|
|
|
if NAMESPACES
|
|
|
|
config UTS_NS
|
|
bool "UTS namespace"
|
|
default y
|
|
help
|
|
In this namespace tasks see different info provided with the
|
|
uname() system call
|
|
|
|
config TIME_NS
|
|
bool "TIME namespace"
|
|
depends on GENERIC_VDSO_TIME_NS
|
|
default y
|
|
help
|
|
In this namespace boottime and monotonic clocks can be set.
|
|
The time will keep going with the same pace.
|
|
|
|
config IPC_NS
|
|
bool "IPC namespace"
|
|
depends on (SYSVIPC || POSIX_MQUEUE)
|
|
default y
|
|
help
|
|
In this namespace tasks work with IPC ids which correspond to
|
|
different IPC objects in different namespaces.
|
|
|
|
config USER_NS
|
|
bool "User namespace"
|
|
default n
|
|
help
|
|
This allows containers, i.e. vservers, to use user namespaces
|
|
to provide different user info for different servers.
|
|
|
|
When user namespaces are enabled in the kernel it is
|
|
recommended that the MEMCG option also be enabled and that
|
|
user-space use the memory control groups to limit the amount
|
|
of memory a memory unprivileged users can use.
|
|
|
|
If unsure, say N.
|
|
|
|
config PID_NS
|
|
bool "PID Namespaces"
|
|
default y
|
|
help
|
|
Support process id namespaces. This allows having multiple
|
|
processes with the same pid as long as they are in different
|
|
pid namespaces. This is a building block of containers.
|
|
|
|
config NET_NS
|
|
bool "Network namespace"
|
|
depends on NET
|
|
default y
|
|
help
|
|
Allow user space to create what appear to be multiple instances
|
|
of the network stack.
|
|
|
|
endif # NAMESPACES
|
|
|
|
config CHECKPOINT_RESTORE
|
|
bool "Checkpoint/restore support"
|
|
select PROC_CHILDREN
|
|
default n
|
|
help
|
|
Enables additional kernel features in a sake of checkpoint/restore.
|
|
In particular it adds auxiliary prctl codes to setup process text,
|
|
data and heap segment sizes, and a few additional /proc filesystem
|
|
entries.
|
|
|
|
If unsure, say N here.
|
|
|
|
config SCHED_AUTOGROUP
|
|
bool "Automatic process group scheduling"
|
|
select CGROUPS
|
|
select CGROUP_SCHED
|
|
select FAIR_GROUP_SCHED
|
|
help
|
|
This option optimizes the scheduler for common desktop workloads by
|
|
automatically creating and populating task groups. This separation
|
|
of workloads isolates aggressive CPU burners (like build jobs) from
|
|
desktop applications. Task group autogeneration is currently based
|
|
upon task session.
|
|
|
|
config SYSFS_DEPRECATED
|
|
bool "Enable deprecated sysfs features to support old userspace tools"
|
|
depends on SYSFS
|
|
default n
|
|
help
|
|
This option adds code that switches the layout of the "block" class
|
|
devices, to not show up in /sys/class/block/, but only in
|
|
/sys/block/.
|
|
|
|
This switch is only active when the sysfs.deprecated=1 boot option is
|
|
passed or the SYSFS_DEPRECATED_V2 option is set.
|
|
|
|
This option allows new kernels to run on old distributions and tools,
|
|
which might get confused by /sys/class/block/. Since 2007/2008 all
|
|
major distributions and tools handle this just fine.
|
|
|
|
Recent distributions and userspace tools after 2009/2010 depend on
|
|
the existence of /sys/class/block/, and will not work with this
|
|
option enabled.
|
|
|
|
Only if you are using a new kernel on an old distribution, you might
|
|
need to say Y here.
|
|
|
|
config SYSFS_DEPRECATED_V2
|
|
bool "Enable deprecated sysfs features by default"
|
|
default n
|
|
depends on SYSFS
|
|
depends on SYSFS_DEPRECATED
|
|
help
|
|
Enable deprecated sysfs by default.
|
|
|
|
See the CONFIG_SYSFS_DEPRECATED option for more details about this
|
|
option.
|
|
|
|
Only if you are using a new kernel on an old distribution, you might
|
|
need to say Y here. Even then, odds are you would not need it
|
|
enabled, you can always pass the boot option if absolutely necessary.
|
|
|
|
config RELAY
|
|
bool "Kernel->user space relay support (formerly relayfs)"
|
|
select IRQ_WORK
|
|
help
|
|
This option enables support for relay interface support in
|
|
certain file systems (such as debugfs).
|
|
It is designed to provide an efficient mechanism for tools and
|
|
facilities to relay large amounts of data from kernel space to
|
|
user space.
|
|
|
|
If unsure, say N.
|
|
|
|
config BLK_DEV_INITRD
|
|
bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
|
|
help
|
|
The initial RAM filesystem is a ramfs which is loaded by the
|
|
boot loader (loadlin or lilo) and that is mounted as root
|
|
before the normal boot procedure. It is typically used to
|
|
load modules needed to mount the "real" root file system,
|
|
etc. See <file:Documentation/admin-guide/initrd.rst> for details.
|
|
|
|
If RAM disk support (BLK_DEV_RAM) is also included, this
|
|
also enables initial RAM disk (initrd) support and adds
|
|
15 Kbytes (more on some other architectures) to the kernel size.
|
|
|
|
If unsure say Y.
|
|
|
|
if BLK_DEV_INITRD
|
|
|
|
source "usr/Kconfig"
|
|
|
|
endif
|
|
|
|
config BOOT_CONFIG
|
|
bool "Boot config support"
|
|
select BLK_DEV_INITRD
|
|
help
|
|
Extra boot config allows system admin to pass a config file as
|
|
complemental extension of kernel cmdline when booting.
|
|
The boot config file must be attached at the end of initramfs
|
|
with checksum, size and magic word.
|
|
See <file:Documentation/admin-guide/bootconfig.rst> for details.
|
|
|
|
If unsure, say Y.
|
|
|
|
choice
|
|
prompt "Compiler optimization level"
|
|
default CC_OPTIMIZE_FOR_PERFORMANCE
|
|
|
|
config CC_OPTIMIZE_FOR_PERFORMANCE
|
|
bool "Optimize for performance (-O2)"
|
|
help
|
|
This is the default optimization level for the kernel, building
|
|
with the "-O2" compiler flag for best performance and most
|
|
helpful compile-time warnings.
|
|
|
|
config CC_OPTIMIZE_FOR_PERFORMANCE_O3
|
|
bool "Optimize more for performance (-O3)"
|
|
depends on ARC
|
|
help
|
|
Choosing this option will pass "-O3" to your compiler to optimize
|
|
the kernel yet more for performance.
|
|
|
|
config CC_OPTIMIZE_FOR_SIZE
|
|
bool "Optimize for size (-Os)"
|
|
help
|
|
Choosing this option will pass "-Os" to your compiler resulting
|
|
in a smaller kernel.
|
|
|
|
endchoice
|
|
|
|
config HAVE_LD_DEAD_CODE_DATA_ELIMINATION
|
|
bool
|
|
help
|
|
This requires that the arch annotates or otherwise protects
|
|
its external entry points from being discarded. Linker scripts
|
|
must also merge .text.*, .data.*, and .bss.* correctly into
|
|
output sections. Care must be taken not to pull in unrelated
|
|
sections (e.g., '.text.init'). Typically '.' in section names
|
|
is used to distinguish them from label names / C identifiers.
|
|
|
|
config LD_DEAD_CODE_DATA_ELIMINATION
|
|
bool "Dead code and data elimination (EXPERIMENTAL)"
|
|
depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION
|
|
depends on EXPERT
|
|
depends on $(cc-option,-ffunction-sections -fdata-sections)
|
|
depends on $(ld-option,--gc-sections)
|
|
help
|
|
Enable this if you want to do dead code and data elimination with
|
|
the linker by compiling with -ffunction-sections -fdata-sections,
|
|
and linking with --gc-sections.
|
|
|
|
This can reduce on disk and in-memory size of the kernel
|
|
code and static data, particularly for small configs and
|
|
on small systems. This has the possibility of introducing
|
|
silently broken kernel if the required annotations are not
|
|
present. This option is not well tested yet, so use at your
|
|
own risk.
|
|
|
|
config SYSCTL
|
|
bool
|
|
|
|
config HAVE_UID16
|
|
bool
|
|
|
|
config SYSCTL_EXCEPTION_TRACE
|
|
bool
|
|
help
|
|
Enable support for /proc/sys/debug/exception-trace.
|
|
|
|
config SYSCTL_ARCH_UNALIGN_NO_WARN
|
|
bool
|
|
help
|
|
Enable support for /proc/sys/kernel/ignore-unaligned-usertrap
|
|
Allows arch to define/use @no_unaligned_warning to possibly warn
|
|
about unaligned access emulation going on under the hood.
|
|
|
|
config SYSCTL_ARCH_UNALIGN_ALLOW
|
|
bool
|
|
help
|
|
Enable support for /proc/sys/kernel/unaligned-trap
|
|
Allows arches to define/use @unaligned_enabled to runtime toggle
|
|
the unaligned access emulation.
|
|
see arch/parisc/kernel/unaligned.c for reference
|
|
|
|
config HAVE_PCSPKR_PLATFORM
|
|
bool
|
|
|
|
# interpreter that classic socket filters depend on
|
|
config BPF
|
|
bool
|
|
|
|
menuconfig EXPERT
|
|
bool "Configure standard kernel features (expert users)"
|
|
# Unhide debug options, to make the on-by-default options visible
|
|
select DEBUG_KERNEL
|
|
help
|
|
This option allows certain base kernel options and settings
|
|
to be disabled or tweaked. This is for specialized
|
|
environments which can tolerate a "non-standard" kernel.
|
|
Only use this if you really know what you are doing.
|
|
|
|
config UID16
|
|
bool "Enable 16-bit UID system calls" if EXPERT
|
|
depends on HAVE_UID16 && MULTIUSER
|
|
default y
|
|
help
|
|
This enables the legacy 16-bit UID syscall wrappers.
|
|
|
|
config MULTIUSER
|
|
bool "Multiple users, groups and capabilities support" if EXPERT
|
|
default y
|
|
help
|
|
This option enables support for non-root users, groups and
|
|
capabilities.
|
|
|
|
If you say N here, all processes will run with UID 0, GID 0, and all
|
|
possible capabilities. Saying N here also compiles out support for
|
|
system calls related to UIDs, GIDs, and capabilities, such as setuid,
|
|
setgid, and capset.
|
|
|
|
If unsure, say Y here.
|
|
|
|
config SGETMASK_SYSCALL
|
|
bool "sgetmask/ssetmask syscalls support" if EXPERT
|
|
def_bool PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH
|
|
help
|
|
sys_sgetmask and sys_ssetmask are obsolete system calls
|
|
no longer supported in libc but still enabled by default in some
|
|
architectures.
|
|
|
|
If unsure, leave the default option here.
|
|
|
|
config SYSFS_SYSCALL
|
|
bool "Sysfs syscall support" if EXPERT
|
|
default y
|
|
help
|
|
sys_sysfs is an obsolete system call no longer supported in libc.
|
|
Note that disabling this option is more secure but might break
|
|
compatibility with some systems.
|
|
|
|
If unsure say Y here.
|
|
|
|
config FHANDLE
|
|
bool "open by fhandle syscalls" if EXPERT
|
|
select EXPORTFS
|
|
default y
|
|
help
|
|
If you say Y here, a user level program will be able to map
|
|
file names to handle and then later use the handle for
|
|
different file system operations. This is useful in implementing
|
|
userspace file servers, which now track files using handles instead
|
|
of names. The handle would remain the same even if file names
|
|
get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
|
|
syscalls.
|
|
|
|
config POSIX_TIMERS
|
|
bool "Posix Clocks & timers" if EXPERT
|
|
default y
|
|
help
|
|
This includes native support for POSIX timers to the kernel.
|
|
Some embedded systems have no use for them and therefore they
|
|
can be configured out to reduce the size of the kernel image.
|
|
|
|
When this option is disabled, the following syscalls won't be
|
|
available: timer_create, timer_gettime: timer_getoverrun,
|
|
timer_settime, timer_delete, clock_adjtime, getitimer,
|
|
setitimer, alarm. Furthermore, the clock_settime, clock_gettime,
|
|
clock_getres and clock_nanosleep syscalls will be limited to
|
|
CLOCK_REALTIME, CLOCK_MONOTONIC and CLOCK_BOOTTIME only.
|
|
|
|
If unsure say y.
|
|
|
|
config PRINTK
|
|
default y
|
|
bool "Enable support for printk" if EXPERT
|
|
select IRQ_WORK
|
|
help
|
|
This option enables normal printk support. Removing it
|
|
eliminates most of the message strings from the kernel image
|
|
and makes the kernel more or less silent. As this makes it
|
|
very difficult to diagnose system problems, saying N here is
|
|
strongly discouraged.
|
|
|
|
config PRINTK_NMI
|
|
def_bool y
|
|
depends on PRINTK
|
|
depends on HAVE_NMI
|
|
|
|
config BUG
|
|
bool "BUG() support" if EXPERT
|
|
default y
|
|
help
|
|
Disabling this option eliminates support for BUG and WARN, reducing
|
|
the size of your kernel image and potentially quietly ignoring
|
|
numerous fatal conditions. You should only consider disabling this
|
|
option for embedded systems with no facilities for reporting errors.
|
|
Just say Y.
|
|
|
|
config ELF_CORE
|
|
depends on COREDUMP
|
|
default y
|
|
bool "Enable ELF core dumps" if EXPERT
|
|
help
|
|
Enable support for generating core dumps. Disabling saves about 4k.
|
|
|
|
|
|
config PCSPKR_PLATFORM
|
|
bool "Enable PC-Speaker support" if EXPERT
|
|
depends on HAVE_PCSPKR_PLATFORM
|
|
select I8253_LOCK
|
|
default y
|
|
help
|
|
This option allows to disable the internal PC-Speaker
|
|
support, saving some memory.
|
|
|
|
config BASE_FULL
|
|
default y
|
|
bool "Enable full-sized data structures for core" if EXPERT
|
|
help
|
|
Disabling this option reduces the size of miscellaneous core
|
|
kernel data structures. This saves memory on small machines,
|
|
but may reduce performance.
|
|
|
|
config FUTEX
|
|
bool "Enable futex support" if EXPERT
|
|
default y
|
|
imply RT_MUTEXES
|
|
help
|
|
Disabling this option will cause the kernel to be built without
|
|
support for "fast userspace mutexes". The resulting kernel may not
|
|
run glibc-based applications correctly.
|
|
|
|
config FUTEX_PI
|
|
bool
|
|
depends on FUTEX && RT_MUTEXES
|
|
default y
|
|
|
|
config HAVE_FUTEX_CMPXCHG
|
|
bool
|
|
depends on FUTEX
|
|
help
|
|
Architectures should select this if futex_atomic_cmpxchg_inatomic()
|
|
is implemented and always working. This removes a couple of runtime
|
|
checks.
|
|
|
|
config EPOLL
|
|
bool "Enable eventpoll support" if EXPERT
|
|
default y
|
|
help
|
|
Disabling this option will cause the kernel to be built without
|
|
support for epoll family of system calls.
|
|
|
|
config SIGNALFD
|
|
bool "Enable signalfd() system call" if EXPERT
|
|
default y
|
|
help
|
|
Enable the signalfd() system call that allows to receive signals
|
|
on a file descriptor.
|
|
|
|
If unsure, say Y.
|
|
|
|
config TIMERFD
|
|
bool "Enable timerfd() system call" if EXPERT
|
|
default y
|
|
help
|
|
Enable the timerfd() system call that allows to receive timer
|
|
events on a file descriptor.
|
|
|
|
If unsure, say Y.
|
|
|
|
config EVENTFD
|
|
bool "Enable eventfd() system call" if EXPERT
|
|
default y
|
|
help
|
|
Enable the eventfd() system call that allows to receive both
|
|
kernel notification (ie. KAIO) or userspace notifications.
|
|
|
|
If unsure, say Y.
|
|
|
|
config SHMEM
|
|
bool "Use full shmem filesystem" if EXPERT
|
|
default y
|
|
depends on MMU
|
|
help
|
|
The shmem is an internal filesystem used to manage shared memory.
|
|
It is backed by swap and manages resource limits. It is also exported
|
|
to userspace as tmpfs if TMPFS is enabled. Disabling this
|
|
option replaces shmem and tmpfs with the much simpler ramfs code,
|
|
which may be appropriate on small systems without swap.
|
|
|
|
config AIO
|
|
bool "Enable AIO support" if EXPERT
|
|
default y
|
|
help
|
|
This option enables POSIX asynchronous I/O which may by used
|
|
by some high performance threaded applications. Disabling
|
|
this option saves about 7k.
|
|
|
|
config IO_URING
|
|
bool "Enable IO uring support" if EXPERT
|
|
select IO_WQ
|
|
default y
|
|
help
|
|
This option enables support for the io_uring interface, enabling
|
|
applications to submit and complete IO through submission and
|
|
completion rings that are shared between the kernel and application.
|
|
|
|
config ADVISE_SYSCALLS
|
|
bool "Enable madvise/fadvise syscalls" if EXPERT
|
|
default y
|
|
help
|
|
This option enables the madvise and fadvise syscalls, used by
|
|
applications to advise the kernel about their future memory or file
|
|
usage, improving performance. If building an embedded system where no
|
|
applications use these syscalls, you can disable this option to save
|
|
space.
|
|
|
|
config HAVE_ARCH_USERFAULTFD_WP
|
|
bool
|
|
help
|
|
Arch has userfaultfd write protection support
|
|
|
|
config MEMBARRIER
|
|
bool "Enable membarrier() system call" if EXPERT
|
|
default y
|
|
help
|
|
Enable the membarrier() system call that allows issuing memory
|
|
barriers across all running threads, which can be used to distribute
|
|
the cost of user-space memory barriers asymmetrically by transforming
|
|
pairs of memory barriers into pairs consisting of membarrier() and a
|
|
compiler barrier.
|
|
|
|
If unsure, say Y.
|
|
|
|
config KALLSYMS
|
|
bool "Load all symbols for debugging/ksymoops" if EXPERT
|
|
default y
|
|
help
|
|
Say Y here to let the kernel print out symbolic crash information and
|
|
symbolic stack backtraces. This increases the size of the kernel
|
|
somewhat, as all symbols have to be loaded into the kernel image.
|
|
|
|
config KALLSYMS_ALL
|
|
bool "Include all symbols in kallsyms"
|
|
depends on DEBUG_KERNEL && KALLSYMS
|
|
help
|
|
Normally kallsyms only contains the symbols of functions for nicer
|
|
OOPS messages and backtraces (i.e., symbols from the text and inittext
|
|
sections). This is sufficient for most cases. And only in very rare
|
|
cases (e.g., when a debugger is used) all symbols are required (e.g.,
|
|
names of variables from the data sections, etc).
|
|
|
|
This option makes sure that all symbols are loaded into the kernel
|
|
image (i.e., symbols from all sections) in cost of increased kernel
|
|
size (depending on the kernel configuration, it may be 300KiB or
|
|
something like this).
|
|
|
|
Say N unless you really need all symbols.
|
|
|
|
config KALLSYMS_ABSOLUTE_PERCPU
|
|
bool
|
|
depends on KALLSYMS
|
|
default X86_64 && SMP
|
|
|
|
config KALLSYMS_BASE_RELATIVE
|
|
bool
|
|
depends on KALLSYMS
|
|
default !IA64
|
|
help
|
|
Instead of emitting them as absolute values in the native word size,
|
|
emit the symbol references in the kallsyms table as 32-bit entries,
|
|
each containing a relative value in the range [base, base + U32_MAX]
|
|
or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either
|
|
an absolute value in the range [0, S32_MAX] or a relative value in the
|
|
range [base, base + S32_MAX], where base is the lowest relative symbol
|
|
address encountered in the image.
|
|
|
|
On 64-bit builds, this reduces the size of the address table by 50%,
|
|
but more importantly, it results in entries whose values are build
|
|
time constants, and no relocation pass is required at runtime to fix
|
|
up the entries based on the runtime load address of the kernel.
|
|
|
|
# end of the "standard kernel features (expert users)" menu
|
|
|
|
# syscall, maps, verifier
|
|
|
|
config BPF_LSM
|
|
bool "LSM Instrumentation with BPF"
|
|
depends on BPF_EVENTS
|
|
depends on BPF_SYSCALL
|
|
depends on SECURITY
|
|
depends on BPF_JIT
|
|
help
|
|
Enables instrumentation of the security hooks with eBPF programs for
|
|
implementing dynamic MAC and Audit Policies.
|
|
|
|
If you are unsure how to answer this question, answer N.
|
|
|
|
config BPF_SYSCALL
|
|
bool "Enable bpf() system call"
|
|
select BPF
|
|
select IRQ_WORK
|
|
default n
|
|
help
|
|
Enable the bpf() system call that allows to manipulate eBPF
|
|
programs and maps via file descriptors.
|
|
|
|
config ARCH_WANT_DEFAULT_BPF_JIT
|
|
bool
|
|
|
|
config BPF_JIT_ALWAYS_ON
|
|
bool "Permanently enable BPF JIT and remove BPF interpreter"
|
|
depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
|
|
help
|
|
Enables BPF JIT and removes BPF interpreter to avoid
|
|
speculative execution of BPF instructions by the interpreter
|
|
|
|
config BPF_JIT_DEFAULT_ON
|
|
def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
|
|
depends on HAVE_EBPF_JIT && BPF_JIT
|
|
|
|
config USERFAULTFD
|
|
bool "Enable userfaultfd() system call"
|
|
depends on MMU
|
|
help
|
|
Enable the userfaultfd() system call that allows to intercept and
|
|
handle page faults in userland.
|
|
|
|
config ARCH_HAS_MEMBARRIER_CALLBACKS
|
|
bool
|
|
|
|
config ARCH_HAS_MEMBARRIER_SYNC_CORE
|
|
bool
|
|
|
|
config RSEQ
|
|
bool "Enable rseq() system call" if EXPERT
|
|
default y
|
|
depends on HAVE_RSEQ
|
|
select MEMBARRIER
|
|
help
|
|
Enable the restartable sequences system call. It provides a
|
|
user-space cache for the current CPU number value, which
|
|
speeds up getting the current CPU number from user-space,
|
|
as well as an ABI to speed up user-space operations on
|
|
per-CPU data.
|
|
|
|
If unsure, say Y.
|
|
|
|
config DEBUG_RSEQ
|
|
default n
|
|
bool "Enabled debugging of rseq() system call" if EXPERT
|
|
depends on RSEQ && DEBUG_KERNEL
|
|
help
|
|
Enable extra debugging checks for the rseq system call.
|
|
|
|
If unsure, say N.
|
|
|
|
config EMBEDDED
|
|
bool "Embedded system"
|
|
option allnoconfig_y
|
|
select EXPERT
|
|
help
|
|
This option should be enabled if compiling the kernel for
|
|
an embedded system so certain expert options are available
|
|
for configuration.
|
|
|
|
config HAVE_PERF_EVENTS
|
|
bool
|
|
help
|
|
See tools/perf/design.txt for details.
|
|
|
|
config PERF_USE_VMALLOC
|
|
bool
|
|
help
|
|
See tools/perf/design.txt for details
|
|
|
|
config PC104
|
|
bool "PC/104 support" if EXPERT
|
|
help
|
|
Expose PC/104 form factor device drivers and options available for
|
|
selection and configuration. Enable this option if your target
|
|
machine has a PC/104 bus.
|
|
|
|
menu "Kernel Performance Events And Counters"
|
|
|
|
config PERF_EVENTS
|
|
bool "Kernel performance events and counters"
|
|
default y if PROFILING
|
|
depends on HAVE_PERF_EVENTS
|
|
select IRQ_WORK
|
|
select SRCU
|
|
help
|
|
Enable kernel support for various performance events provided
|
|
by software and hardware.
|
|
|
|
Software events are supported either built-in or via the
|
|
use of generic tracepoints.
|
|
|
|
Most modern CPUs support performance events via performance
|
|
counter registers. These registers count the number of certain
|
|
types of hw events: such as instructions executed, cachemisses
|
|
suffered, or branches mis-predicted - without slowing down the
|
|
kernel or applications. These registers can also trigger interrupts
|
|
when a threshold number of events have passed - and can thus be
|
|
used to profile the code that runs on that CPU.
|
|
|
|
The Linux Performance Event subsystem provides an abstraction of
|
|
these software and hardware event capabilities, available via a
|
|
system call and used by the "perf" utility in tools/perf/. It
|
|
provides per task and per CPU counters, and it provides event
|
|
capabilities on top of those.
|
|
|
|
Say Y if unsure.
|
|
|
|
config DEBUG_PERF_USE_VMALLOC
|
|
default n
|
|
bool "Debug: use vmalloc to back perf mmap() buffers"
|
|
depends on PERF_EVENTS && DEBUG_KERNEL && !PPC
|
|
select PERF_USE_VMALLOC
|
|
help
|
|
Use vmalloc memory to back perf mmap() buffers.
|
|
|
|
Mostly useful for debugging the vmalloc code on platforms
|
|
that don't require it.
|
|
|
|
Say N if unsure.
|
|
|
|
endmenu
|
|
|
|
config VM_EVENT_COUNTERS
|
|
default y
|
|
bool "Enable VM event counters for /proc/vmstat" if EXPERT
|
|
help
|
|
VM event counters are needed for event counts to be shown.
|
|
This option allows the disabling of the VM event counters
|
|
on EXPERT systems. /proc/vmstat will only show page counts
|
|
if VM event counters are disabled.
|
|
|
|
config SLUB_DEBUG
|
|
default y
|
|
bool "Enable SLUB debugging support" if EXPERT
|
|
depends on SLUB && SYSFS
|
|
help
|
|
SLUB has extensive debug support features. Disabling these can
|
|
result in significant savings in code size. This also disables
|
|
SLUB sysfs support. /sys/slab will not exist and there will be
|
|
no support for cache validation etc.
|
|
|
|
config SLUB_MEMCG_SYSFS_ON
|
|
default n
|
|
bool "Enable memcg SLUB sysfs support by default" if EXPERT
|
|
depends on SLUB && SYSFS && MEMCG
|
|
help
|
|
SLUB creates a directory under /sys/kernel/slab for each
|
|
allocation cache to host info and debug files. If memory
|
|
cgroup is enabled, each cache can have per memory cgroup
|
|
caches. SLUB can create the same sysfs directories for these
|
|
caches under /sys/kernel/slab/CACHE/cgroup but it can lead
|
|
to a very high number of debug files being created. This is
|
|
controlled by slub_memcg_sysfs boot parameter and this
|
|
config option determines the parameter's default value.
|
|
|
|
config COMPAT_BRK
|
|
bool "Disable heap randomization"
|
|
default y
|
|
help
|
|
Randomizing heap placement makes heap exploits harder, but it
|
|
also breaks ancient binaries (including anything libc5 based).
|
|
This option changes the bootup default to heap randomization
|
|
disabled, and can be overridden at runtime by setting
|
|
/proc/sys/kernel/randomize_va_space to 2.
|
|
|
|
On non-ancient distros (post-2000 ones) N is usually a safe choice.
|
|
|
|
choice
|
|
prompt "Choose SLAB allocator"
|
|
default SLUB
|
|
help
|
|
This option allows to select a slab allocator.
|
|
|
|
config SLAB
|
|
bool "SLAB"
|
|
select HAVE_HARDENED_USERCOPY_ALLOCATOR
|
|
help
|
|
The regular slab allocator that is established and known to work
|
|
well in all environments. It organizes cache hot objects in
|
|
per cpu and per node queues.
|
|
|
|
config SLUB
|
|
bool "SLUB (Unqueued Allocator)"
|
|
select HAVE_HARDENED_USERCOPY_ALLOCATOR
|
|
help
|
|
SLUB is a slab allocator that minimizes cache line usage
|
|
instead of managing queues of cached objects (SLAB approach).
|
|
Per cpu caching is realized using slabs of objects instead
|
|
of queues of objects. SLUB can use memory efficiently
|
|
and has enhanced diagnostics. SLUB is the default choice for
|
|
a slab allocator.
|
|
|
|
config SLOB
|
|
depends on EXPERT
|
|
bool "SLOB (Simple Allocator)"
|
|
help
|
|
SLOB replaces the stock allocator with a drastically simpler
|
|
allocator. SLOB is generally more space efficient but
|
|
does not perform as well on large systems.
|
|
|
|
endchoice
|
|
|
|
config SLAB_MERGE_DEFAULT
|
|
bool "Allow slab caches to be merged"
|
|
default y
|
|
help
|
|
For reduced kernel memory fragmentation, slab caches can be
|
|
merged when they share the same size and other characteristics.
|
|
This carries a risk of kernel heap overflows being able to
|
|
overwrite objects from merged caches (and more easily control
|
|
cache layout), which makes such heap attacks easier to exploit
|
|
by attackers. By keeping caches unmerged, these kinds of exploits
|
|
can usually only damage objects in the same cache. To disable
|
|
merging at runtime, "slab_nomerge" can be passed on the kernel
|
|
command line.
|
|
|
|
config SLAB_FREELIST_RANDOM
|
|
bool "Randomize slab freelist"
|
|
depends on SLAB || SLUB
|
|
help
|
|
Randomizes the freelist order used on creating new pages. This
|
|
security feature reduces the predictability of the kernel slab
|
|
allocator against heap overflows.
|
|
|
|
config SLAB_FREELIST_HARDENED
|
|
bool "Harden slab freelist metadata"
|
|
depends on SLAB || SLUB
|
|
help
|
|
Many kernel heap attacks try to target slab cache metadata and
|
|
other infrastructure. This options makes minor performance
|
|
sacrifices to harden the kernel slab allocator against common
|
|
freelist exploit methods. Some slab implementations have more
|
|
sanity-checking than others. This option is most effective with
|
|
CONFIG_SLUB.
|
|
|
|
config SHUFFLE_PAGE_ALLOCATOR
|
|
bool "Page allocator randomization"
|
|
default SLAB_FREELIST_RANDOM && ACPI_NUMA
|
|
help
|
|
Randomization of the page allocator improves the average
|
|
utilization of a direct-mapped memory-side-cache. See section
|
|
5.2.27 Heterogeneous Memory Attribute Table (HMAT) in the ACPI
|
|
6.2a specification for an example of how a platform advertises
|
|
the presence of a memory-side-cache. There are also incidental
|
|
security benefits as it reduces the predictability of page
|
|
allocations to compliment SLAB_FREELIST_RANDOM, but the
|
|
default granularity of shuffling on the "MAX_ORDER - 1" i.e,
|
|
10th order of pages is selected based on cache utilization
|
|
benefits on x86.
|
|
|
|
While the randomization improves cache utilization it may
|
|
negatively impact workloads on platforms without a cache. For
|
|
this reason, by default, the randomization is enabled only
|
|
after runtime detection of a direct-mapped memory-side-cache.
|
|
Otherwise, the randomization may be force enabled with the
|
|
'page_alloc.shuffle' kernel command line parameter.
|
|
|
|
Say Y if unsure.
|
|
|
|
config SLUB_CPU_PARTIAL
|
|
default y
|
|
depends on SLUB && SMP
|
|
bool "SLUB per cpu partial cache"
|
|
help
|
|
Per cpu partial caches accelerate objects allocation and freeing
|
|
that is local to a processor at the price of more indeterminism
|
|
in the latency of the free. On overflow these caches will be cleared
|
|
which requires the taking of locks that may cause latency spikes.
|
|
Typically one would choose no for a realtime system.
|
|
|
|
config MMAP_ALLOW_UNINITIALIZED
|
|
bool "Allow mmapped anonymous memory to be uninitialized"
|
|
depends on EXPERT && !MMU
|
|
default n
|
|
help
|
|
Normally, and according to the Linux spec, anonymous memory obtained
|
|
from mmap() has its contents cleared before it is passed to
|
|
userspace. Enabling this config option allows you to request that
|
|
mmap() skip that if it is given an MAP_UNINITIALIZED flag, thus
|
|
providing a huge performance boost. If this option is not enabled,
|
|
then the flag will be ignored.
|
|
|
|
This is taken advantage of by uClibc's malloc(), and also by
|
|
ELF-FDPIC binfmt's brk and stack allocator.
|
|
|
|
Because of the obvious security issues, this option should only be
|
|
enabled on embedded devices where you control what is run in
|
|
userspace. Since that isn't generally a problem on no-MMU systems,
|
|
it is normally safe to say Y here.
|
|
|
|
See Documentation/mm/nommu-mmap.rst for more information.
|
|
|
|
config SYSTEM_DATA_VERIFICATION
|
|
def_bool n
|
|
select SYSTEM_TRUSTED_KEYRING
|
|
select KEYS
|
|
select CRYPTO
|
|
select CRYPTO_RSA
|
|
select ASYMMETRIC_KEY_TYPE
|
|
select ASYMMETRIC_PUBLIC_KEY_SUBTYPE
|
|
select ASN1
|
|
select OID_REGISTRY
|
|
select X509_CERTIFICATE_PARSER
|
|
select PKCS7_MESSAGE_PARSER
|
|
help
|
|
Provide PKCS#7 message verification using the contents of the system
|
|
trusted keyring to provide public keys. This then can be used for
|
|
module verification, kexec image verification and firmware blob
|
|
verification.
|
|
|
|
config PROFILING
|
|
bool "Profiling support"
|
|
help
|
|
Say Y here to enable the extended profiling support mechanisms used
|
|
by profilers such as OProfile.
|
|
|
|
#
|
|
# Place an empty function call at each tracepoint site. Can be
|
|
# dynamically changed for a probe function.
|
|
#
|
|
config TRACEPOINTS
|
|
bool
|
|
|
|
endmenu # General setup
|
|
|
|
source "arch/Kconfig"
|
|
|
|
config RT_MUTEXES
|
|
bool
|
|
|
|
config BASE_SMALL
|
|
int
|
|
default 0 if BASE_FULL
|
|
default 1 if !BASE_FULL
|
|
|
|
config MODULE_SIG_FORMAT
|
|
def_bool n
|
|
select SYSTEM_DATA_VERIFICATION
|
|
|
|
menuconfig MODULES
|
|
bool "Enable loadable module support"
|
|
option modules
|
|
help
|
|
Kernel modules are small pieces of compiled code which can
|
|
be inserted in the running kernel, rather than being
|
|
permanently built into the kernel. You use the "modprobe"
|
|
tool to add (and sometimes remove) them. If you say Y here,
|
|
many parts of the kernel can be built as modules (by
|
|
answering M instead of Y where indicated): this is most
|
|
useful for infrequently used options which are not required
|
|
for booting. For more information, see the man pages for
|
|
modprobe, lsmod, modinfo, insmod and rmmod.
|
|
|
|
If you say Y here, you will need to run "make
|
|
modules_install" to put the modules under /lib/modules/
|
|
where modprobe can find them (you may need to be root to do
|
|
this).
|
|
|
|
If unsure, say Y.
|
|
|
|
if MODULES
|
|
|
|
config MODULE_FORCE_LOAD
|
|
bool "Forced module loading"
|
|
default n
|
|
help
|
|
Allow loading of modules without version information (ie. modprobe
|
|
--force). Forced module loading sets the 'F' (forced) taint flag and
|
|
is usually a really bad idea.
|
|
|
|
config MODULE_UNLOAD
|
|
bool "Module unloading"
|
|
help
|
|
Without this option you will not be able to unload any
|
|
modules (note that some modules may not be unloadable
|
|
anyway), which makes your kernel smaller, faster
|
|
and simpler. If unsure, say Y.
|
|
|
|
config MODULE_FORCE_UNLOAD
|
|
bool "Forced module unloading"
|
|
depends on MODULE_UNLOAD
|
|
help
|
|
This option allows you to force a module to unload, even if the
|
|
kernel believes it is unsafe: the kernel will remove the module
|
|
without waiting for anyone to stop using it (using the -f option to
|
|
rmmod). This is mainly for kernel developers and desperate users.
|
|
If unsure, say N.
|
|
|
|
config MODVERSIONS
|
|
bool "Module versioning support"
|
|
help
|
|
Usually, you have to use modules compiled with your kernel.
|
|
Saying Y here makes it sometimes possible to use modules
|
|
compiled for different kernels, by adding enough information
|
|
to the modules to (hopefully) spot any changes which would
|
|
make them incompatible with the kernel you are running. If
|
|
unsure, say N.
|
|
|
|
config ASM_MODVERSIONS
|
|
bool
|
|
default HAVE_ASM_MODVERSIONS && MODVERSIONS
|
|
help
|
|
This enables module versioning for exported symbols also from
|
|
assembly. This can be enabled only when the target architecture
|
|
supports it.
|
|
|
|
config MODULE_REL_CRCS
|
|
bool
|
|
depends on MODVERSIONS
|
|
|
|
config MODULE_SRCVERSION_ALL
|
|
bool "Source checksum for all modules"
|
|
help
|
|
Modules which contain a MODULE_VERSION get an extra "srcversion"
|
|
field inserted into their modinfo section, which contains a
|
|
sum of the source files which made it. This helps maintainers
|
|
see exactly which source was used to build a module (since
|
|
others sometimes change the module source without updating
|
|
the version). With this option, such a "srcversion" field
|
|
will be created for all modules. If unsure, say N.
|
|
|
|
config MODULE_SIG
|
|
bool "Module signature verification"
|
|
select MODULE_SIG_FORMAT
|
|
help
|
|
Check modules for valid signatures upon load: the signature
|
|
is simply appended to the module. For more information see
|
|
<file:Documentation/admin-guide/module-signing.rst>.
|
|
|
|
Note that this option adds the OpenSSL development packages as a
|
|
kernel build dependency so that the signing tool can use its crypto
|
|
library.
|
|
|
|
You should enable this option if you wish to use either
|
|
CONFIG_SECURITY_LOCKDOWN_LSM or lockdown functionality imposed via
|
|
another LSM - otherwise unsigned modules will be loadable regardless
|
|
of the lockdown policy.
|
|
|
|
!!!WARNING!!! If you enable this option, you MUST make sure that the
|
|
module DOES NOT get stripped after being signed. This includes the
|
|
debuginfo strip done by some packagers (such as rpmbuild) and
|
|
inclusion into an initramfs that wants the module size reduced.
|
|
|
|
config MODULE_SIG_FORCE
|
|
bool "Require modules to be validly signed"
|
|
depends on MODULE_SIG
|
|
help
|
|
Reject unsigned modules or signed modules for which we don't have a
|
|
key. Without this, such modules will simply taint the kernel.
|
|
|
|
config MODULE_SIG_ALL
|
|
bool "Automatically sign all modules"
|
|
default y
|
|
depends on MODULE_SIG
|
|
help
|
|
Sign all modules during make modules_install. Without this option,
|
|
modules must be signed manually, using the scripts/sign-file tool.
|
|
|
|
comment "Do not forget to sign required modules with scripts/sign-file"
|
|
depends on MODULE_SIG_FORCE && !MODULE_SIG_ALL
|
|
|
|
choice
|
|
prompt "Which hash algorithm should modules be signed with?"
|
|
depends on MODULE_SIG
|
|
help
|
|
This determines which sort of hashing algorithm will be used during
|
|
signature generation. This algorithm _must_ be built into the kernel
|
|
directly so that signature verification can take place. It is not
|
|
possible to load a signed module containing the algorithm to check
|
|
the signature on that module.
|
|
|
|
config MODULE_SIG_SHA1
|
|
bool "Sign modules with SHA-1"
|
|
select CRYPTO_SHA1
|
|
|
|
config MODULE_SIG_SHA224
|
|
bool "Sign modules with SHA-224"
|
|
select CRYPTO_SHA256
|
|
|
|
config MODULE_SIG_SHA256
|
|
bool "Sign modules with SHA-256"
|
|
select CRYPTO_SHA256
|
|
|
|
config MODULE_SIG_SHA384
|
|
bool "Sign modules with SHA-384"
|
|
select CRYPTO_SHA512
|
|
|
|
config MODULE_SIG_SHA512
|
|
bool "Sign modules with SHA-512"
|
|
select CRYPTO_SHA512
|
|
|
|
endchoice
|
|
|
|
config MODULE_SIG_HASH
|
|
string
|
|
depends on MODULE_SIG
|
|
default "sha1" if MODULE_SIG_SHA1
|
|
default "sha224" if MODULE_SIG_SHA224
|
|
default "sha256" if MODULE_SIG_SHA256
|
|
default "sha384" if MODULE_SIG_SHA384
|
|
default "sha512" if MODULE_SIG_SHA512
|
|
|
|
config MODULE_COMPRESS
|
|
bool "Compress modules on installation"
|
|
help
|
|
|
|
Compresses kernel modules when 'make modules_install' is run; gzip or
|
|
xz depending on "Compression algorithm" below.
|
|
|
|
module-init-tools MAY support gzip, and kmod MAY support gzip and xz.
|
|
|
|
Out-of-tree kernel modules installed using Kbuild will also be
|
|
compressed upon installation.
|
|
|
|
Note: for modules inside an initrd or initramfs, it's more efficient
|
|
to compress the whole initrd or initramfs instead.
|
|
|
|
Note: This is fully compatible with signed modules.
|
|
|
|
If in doubt, say N.
|
|
|
|
choice
|
|
prompt "Compression algorithm"
|
|
depends on MODULE_COMPRESS
|
|
default MODULE_COMPRESS_GZIP
|
|
help
|
|
This determines which sort of compression will be used during
|
|
'make modules_install'.
|
|
|
|
GZIP (default) and XZ are supported.
|
|
|
|
config MODULE_COMPRESS_GZIP
|
|
bool "GZIP"
|
|
|
|
config MODULE_COMPRESS_XZ
|
|
bool "XZ"
|
|
|
|
endchoice
|
|
|
|
config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
|
|
bool "Allow loading of modules with missing namespace imports"
|
|
help
|
|
Symbols exported with EXPORT_SYMBOL_NS*() are considered exported in
|
|
a namespace. A module that makes use of a symbol exported with such a
|
|
namespace is required to import the namespace via MODULE_IMPORT_NS().
|
|
There is no technical reason to enforce correct namespace imports,
|
|
but it creates consistency between symbols defining namespaces and
|
|
users importing namespaces they make use of. This option relaxes this
|
|
requirement and lifts the enforcement when loading a module.
|
|
|
|
If unsure, say N.
|
|
|
|
config UNUSED_SYMBOLS
|
|
bool "Enable unused/obsolete exported symbols"
|
|
default y if X86
|
|
help
|
|
Unused but exported symbols make the kernel needlessly bigger. For
|
|
that reason most of these unused exports will soon be removed. This
|
|
option is provided temporarily to provide a transition period in case
|
|
some external kernel module needs one of these symbols anyway. If you
|
|
encounter such a case in your module, consider if you are actually
|
|
using the right API. (rationale: since nobody in the kernel is using
|
|
this in a module, there is a pretty good chance it's actually the
|
|
wrong interface to use). If you really need the symbol, please send a
|
|
mail to the linux kernel mailing list mentioning the symbol and why
|
|
you really need it, and what the merge plan to the mainline kernel for
|
|
your module is.
|
|
|
|
config TRIM_UNUSED_KSYMS
|
|
bool "Trim unused exported kernel symbols"
|
|
depends on !UNUSED_SYMBOLS
|
|
help
|
|
The kernel and some modules make many symbols available for
|
|
other modules to use via EXPORT_SYMBOL() and variants. Depending
|
|
on the set of modules being selected in your kernel configuration,
|
|
many of those exported symbols might never be used.
|
|
|
|
This option allows for unused exported symbols to be dropped from
|
|
the build. In turn, this provides the compiler more opportunities
|
|
(especially when using LTO) for optimizing the code and reducing
|
|
binary size. This might have some security advantages as well.
|
|
|
|
If unsure, or if you need to build out-of-tree modules, say N.
|
|
|
|
config UNUSED_KSYMS_WHITELIST
|
|
string "Whitelist of symbols to keep in ksymtab"
|
|
depends on TRIM_UNUSED_KSYMS
|
|
help
|
|
By default, all unused exported symbols will be un-exported from the
|
|
build when TRIM_UNUSED_KSYMS is selected.
|
|
|
|
UNUSED_KSYMS_WHITELIST allows to whitelist symbols that must be kept
|
|
exported at all times, even in absence of in-tree users. The value to
|
|
set here is the path to a text file containing the list of symbols,
|
|
one per line. The path can be absolute, or relative to the kernel
|
|
source tree.
|
|
|
|
endif # MODULES
|
|
|
|
config MODULES_TREE_LOOKUP
|
|
def_bool y
|
|
depends on PERF_EVENTS || TRACING
|
|
|
|
config INIT_ALL_POSSIBLE
|
|
bool
|
|
help
|
|
Back when each arch used to define their own cpu_online_mask and
|
|
cpu_possible_mask, some of them chose to initialize cpu_possible_mask
|
|
with all 1s, and others with all 0s. When they were centralised,
|
|
it was better to provide this option than to break all the archs
|
|
and have several arch maintainers pursuing me down dark alleys.
|
|
|
|
source "block/Kconfig"
|
|
|
|
config PREEMPT_NOTIFIERS
|
|
bool
|
|
|
|
config PADATA
|
|
depends on SMP
|
|
bool
|
|
|
|
config ASN1
|
|
tristate
|
|
help
|
|
Build a simple ASN.1 grammar compiler that produces a bytecode output
|
|
that can be interpreted by the ASN.1 stream decoder and used to
|
|
inform it as to what tags are to be expected in a stream and what
|
|
functions to call on what tags.
|
|
|
|
source "kernel/Kconfig.locks"
|
|
|
|
config ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
|
|
bool
|
|
|
|
config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
|
|
bool
|
|
|
|
# It may be useful for an architecture to override the definitions of the
|
|
# SYSCALL_DEFINE() and __SYSCALL_DEFINEx() macros in <linux/syscalls.h>
|
|
# and the COMPAT_ variants in <linux/compat.h>, in particular to use a
|
|
# different calling convention for syscalls. They can also override the
|
|
# macros for not-implemented syscalls in kernel/sys_ni.c and
|
|
# kernel/time/posix-stubs.c. All these overrides need to be available in
|
|
# <asm/syscall_wrapper.h>.
|
|
config ARCH_HAS_SYSCALL_WRAPPER
|
|
def_bool n
|