mirror of
https://github.com/systemd/systemd.git
synced 2024-11-27 20:23:36 +08:00
execute: use prctl(PR_SET_MDWE) for MemoryDenyWriteExecute=yes
On some ARM platforms, the dynamic linker could use PROT_BTI memory protection flag with `mprotect(..., PROT_BTI | PROT_EXEC)` to enable additional memory protection for executable pages. But `MemoryDenyWriteExecute=yes` blocks this with seccomp filter denying all `mprotect(..., x | PROT_EXEC)`. Newly preferred method is to use prctl(PR_SET_MDWE) on supported kernels. Then in-kernel implementation can allow PROT_BTI as necessary, without weakening MDWE. In-kernel version may also be extended to more sophisticated protections in the future.
This commit is contained in:
parent
cbcdc582f3
commit
7a114ed4b3
@ -2080,9 +2080,11 @@ RestrictNamespaces=~cgroup net</programlisting>
|
||||
|
||||
<listitem><para>Takes a boolean argument. If set, attempts to create memory mappings that are writable and
|
||||
executable at the same time, or to change existing memory mappings to become executable, or mapping shared
|
||||
memory segments as executable, are prohibited. Specifically, a system call filter is added that rejects
|
||||
<citerefentry><refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum></citerefentry> system calls with both
|
||||
<constant>PROT_EXEC</constant> and <constant>PROT_WRITE</constant> set,
|
||||
memory segments as executable, are prohibited. Specifically, a system call filter is added (or
|
||||
preferably, an equivalent kernel check is enabled with
|
||||
<citerefentry><refentrytitle>prctl</refentrytitle><manvolnum>2</manvolnum></citerefentry>) that
|
||||
rejects <citerefentry><refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum></citerefentry>
|
||||
system calls with both <constant>PROT_EXEC</constant> and <constant>PROT_WRITE</constant> set,
|
||||
<citerefentry><refentrytitle>mprotect</refentrytitle><manvolnum>2</manvolnum></citerefentry> or
|
||||
<citerefentry><refentrytitle>pkey_mprotect</refentrytitle><manvolnum>2</manvolnum></citerefentry> system calls
|
||||
with <constant>PROT_EXEC</constant> set and
|
||||
|
@ -12,3 +12,11 @@
|
||||
#define PR_CAP_AMBIENT_LOWER 3
|
||||
#define PR_CAP_AMBIENT_CLEAR_ALL 4
|
||||
#endif
|
||||
|
||||
/* b507808ebce23561d4ff8c2aa1fb949fe402bc61 (6.3) */
|
||||
#ifndef PR_SET_MDWE
|
||||
#define PR_SET_MDWE 65
|
||||
#endif
|
||||
#ifndef PR_MDWE_REFUSE_EXEC_GAIN
|
||||
#define PR_MDWE_REFUSE_EXEC_GAIN 1
|
||||
#endif
|
||||
|
@ -73,6 +73,7 @@
|
||||
#include "memory-util.h"
|
||||
#include "missing_fs.h"
|
||||
#include "missing_ioprio.h"
|
||||
#include "missing_prctl.h"
|
||||
#include "mkdir-label.h"
|
||||
#include "mount-util.h"
|
||||
#include "mountpoint-util.h"
|
||||
@ -1571,12 +1572,25 @@ static int apply_address_families(const Unit* u, const ExecContext *c) {
|
||||
}
|
||||
|
||||
static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
assert(c);
|
||||
|
||||
if (!c->memory_deny_write_execute)
|
||||
return 0;
|
||||
|
||||
/* use prctl() if kernel supports it (6.3) */
|
||||
r = prctl(PR_SET_MDWE, PR_MDWE_REFUSE_EXEC_GAIN, 0, 0, 0);
|
||||
if (r == 0) {
|
||||
log_unit_debug(u, "Enabled MemoryDenyWriteExecute= with PR_SET_MDWE");
|
||||
return 0;
|
||||
}
|
||||
if (r < 0 && errno != EINVAL)
|
||||
return log_unit_debug_errno(u, errno, "Failed to enable MemoryDenyWriteExecute= with PR_SET_MDWE: %m");
|
||||
/* else use seccomp */
|
||||
log_unit_debug(u, "Kernel doesn't support PR_SET_MDWE: falling back to seccomp");
|
||||
|
||||
if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
|
||||
return 0;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user