mirror of
https://github.com/systemd/systemd.git
synced 2024-12-02 23:03:50 +08:00
Merge pull request #9762 from poettering/nspawn-oci
OCI runtime support for nspawn
This commit is contained in:
commit
d0b6a10c00
@ -238,6 +238,15 @@
|
||||
together with <option>--directory=</option>, <option>--template=</option>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--oci-bundle=</option></term>
|
||||
|
||||
<listitem><para>Takes the path to an OCI runtime bundle to invoke, as specified in the <ulink
|
||||
url="https://github.com/opencontainers/runtime-spec/blob/master/spec.md">OCI Runtime Specification</ulink>. In
|
||||
this case no <filename>.nspawn</filename> file is loaded, and the root directory and various settings are read
|
||||
from the OCI runtime JSON data (but data passed on the command line takes precedence).</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--root-hash=</option></term>
|
||||
|
||||
@ -952,6 +961,16 @@
|
||||
make them read-only, using <option>--bind-ro=</option>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--inaccessible=</option></term>
|
||||
|
||||
<listitem><para>Make the specified path inaccessible in the container. This over-mounts the specified path
|
||||
(which must exist in the container) with a file node of the same type that is empty and has the most
|
||||
restrictive access mode supported. This is an effective way to mask files, directories and other file system
|
||||
objects from the container payload. This option may be used more than once in case all specified paths are
|
||||
masked.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--tmpfs=</option></term>
|
||||
|
||||
@ -1084,6 +1103,42 @@
|
||||
same as the one reported on the host.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--console=</option><replaceable>MODE</replaceable></term>
|
||||
|
||||
<listitem><para>Configures how to set up standard input, output and error output for the container payload, as
|
||||
well as the <filename>/dev/console</filename> device for the container. Takes one of
|
||||
<option>interactive</option>, <option>read-only</option>, <option>passive</option> or <option>pipe</option>. If
|
||||
<option>interactive</option> a pseudo-TTY is allocated and made available as <filename>/dev/console</filename>
|
||||
in the container. It is then bi-directionally connected to the standard input and output passed to
|
||||
<command>systemd-nspawn</command>. <option>read-only</option> is similar but only the output of the container
|
||||
is propagated and no input from the caller is read. In <option>passive</option> mode a pseudo TTY is allocated,
|
||||
but it is not connected anywhere. Finally, in <option>pipe</option> mode no pseudo TTY is allocated, but the
|
||||
passed standard input, output and error output file descriptors are passed on — as they are — to the container
|
||||
payload. In this mode <filename>/dev/console</filename> will not exist in the container. Note that in this mode
|
||||
the container payload generally cannot be a full init system as init systems tend to require
|
||||
<filename>/dev/console</filename> to be available. On the other hand, in this mode container invocations can be
|
||||
used within shell pipelines. This is because intermediary pseudo TTYs do not permit independent bidirectional
|
||||
propagation of the end-of-file (EOF) condition, which is necessary for shell pipelines to work
|
||||
correctly.</para>
|
||||
|
||||
<para>Note that the <option>pipe</option> mode should be used carefully, as passing arbitrary file descriptors
|
||||
to less trusted container payloads might open up unwanted interfaces for access by the container payload. For
|
||||
example, if a passed file descriptor refers to a TTY of some form, APIs such as <constant>TIOCSTI</constant>
|
||||
may be used to synthesize input that might be used for escaping the container. Hence <option>pipe</option> mode
|
||||
should only be used if the payload is sufficiently trusted or when the standard input/output/error output file
|
||||
descriptors are known safe, for example pipes. Defaults to <option>interactive</option> if
|
||||
<command>systemd-nspawn</command> is invoked from a terminal, and <option>read-only</option>
|
||||
otherwise.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--pipe</option></term>
|
||||
<term><option>-P</option></term>
|
||||
|
||||
<listitem><para>Equivalent to <option>--console=pipe</option>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-q</option></term>
|
||||
<term><option>--quiet</option></term>
|
||||
|
@ -425,6 +425,17 @@
|
||||
is privileged (see above).</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>Inaccessible=</varname></term>
|
||||
|
||||
<listitem><para>Masks the specified file or directly in the container, by over-mounting it with an empty file
|
||||
node of the same type with the most restrictive access mode. Takes a file system path as arugment. This option
|
||||
may be used multiple times to mask multiple files or directories. This option is equivalent to the command line
|
||||
switch <option>--inaccessible=</option>, see
|
||||
<citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry> for details
|
||||
about the specific options supported. This setting is privileged (see above).</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>Overlay=</varname></term>
|
||||
<term><varname>OverlayReadOnly=</varname></term>
|
||||
|
@ -47,6 +47,13 @@ unsigned long cap_last_cap(void) {
|
||||
if (r >= 0) {
|
||||
r = safe_atolu(content, &p);
|
||||
if (r >= 0) {
|
||||
|
||||
if (p > 63) /* Safety for the future: if one day the kernel learns more than 64 caps,
|
||||
* then we are in trouble (since we, as much userspace and kernel space
|
||||
* store capability masks in uint64_t types. Let's hence protect
|
||||
* ourselves against that and always cap at 63 for now. */
|
||||
p = 63;
|
||||
|
||||
saved = p;
|
||||
valid = true;
|
||||
return p;
|
||||
@ -58,17 +65,15 @@ unsigned long cap_last_cap(void) {
|
||||
|
||||
if (prctl(PR_CAPBSET_READ, p) < 0) {
|
||||
|
||||
/* Hmm, look downwards, until we find one that
|
||||
* works */
|
||||
/* Hmm, look downwards, until we find one that works */
|
||||
for (p--; p > 0; p --)
|
||||
if (prctl(PR_CAPBSET_READ, p) >= 0)
|
||||
break;
|
||||
|
||||
} else {
|
||||
|
||||
/* Hmm, look upwards, until we find one that doesn't
|
||||
* work */
|
||||
for (;; p++)
|
||||
/* Hmm, look upwards, until we find one that doesn't work */
|
||||
for (; p < 63; p++)
|
||||
if (prctl(PR_CAPBSET_READ, p+1) < 0)
|
||||
break;
|
||||
}
|
||||
@ -363,6 +368,7 @@ bool ambient_capabilities_supported(void) {
|
||||
|
||||
int capability_quintet_enforce(const CapabilityQuintet *q) {
|
||||
_cleanup_cap_free_ cap_t c = NULL;
|
||||
bool need_set_proc_again = false;
|
||||
int r;
|
||||
|
||||
if (q->ambient != (uint64_t) -1) {
|
||||
@ -393,7 +399,6 @@ int capability_quintet_enforce(const CapabilityQuintet *q) {
|
||||
|
||||
if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, CAP_SET) < 0)
|
||||
return -errno;
|
||||
|
||||
if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
|
||||
return -errno;
|
||||
|
||||
@ -426,8 +431,15 @@ int capability_quintet_enforce(const CapabilityQuintet *q) {
|
||||
if (q->inheritable != (uint64_t) -1) {
|
||||
cap_flag_value_t old_value, new_value;
|
||||
|
||||
if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value) < 0)
|
||||
if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value) < 0) {
|
||||
if (errno == EINVAL) /* If the kernel knows more caps than this
|
||||
* version of libcap, then this will return
|
||||
* EINVAL. In that case, simply ignore it,
|
||||
* pretend it doesn't exist. */
|
||||
continue;
|
||||
|
||||
return -errno;
|
||||
}
|
||||
|
||||
new_value = (q->inheritable & m) ? CAP_SET : CAP_CLEAR;
|
||||
|
||||
@ -442,8 +454,12 @@ int capability_quintet_enforce(const CapabilityQuintet *q) {
|
||||
if (q->permitted != (uint64_t) -1) {
|
||||
cap_flag_value_t old_value, new_value;
|
||||
|
||||
if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value) < 0)
|
||||
if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value) < 0) {
|
||||
if (errno == EINVAL)
|
||||
continue;
|
||||
|
||||
return -errno;
|
||||
}
|
||||
|
||||
new_value = (q->permitted & m) ? CAP_SET : CAP_CLEAR;
|
||||
|
||||
@ -458,8 +474,12 @@ int capability_quintet_enforce(const CapabilityQuintet *q) {
|
||||
if (q->effective != (uint64_t) -1) {
|
||||
cap_flag_value_t old_value, new_value;
|
||||
|
||||
if (cap_get_flag(c, cv, CAP_EFFECTIVE, &old_value) < 0)
|
||||
if (cap_get_flag(c, cv, CAP_EFFECTIVE, &old_value) < 0) {
|
||||
if (errno == EINVAL)
|
||||
continue;
|
||||
|
||||
return -errno;
|
||||
}
|
||||
|
||||
new_value = (q->effective & m) ? CAP_SET : CAP_CLEAR;
|
||||
|
||||
@ -472,9 +492,39 @@ int capability_quintet_enforce(const CapabilityQuintet *q) {
|
||||
}
|
||||
}
|
||||
|
||||
if (changed)
|
||||
if (cap_set_proc(c) < 0)
|
||||
if (changed) {
|
||||
_cleanup_cap_free_ cap_t modified = NULL;
|
||||
|
||||
/* In order to change the bounding caps, we need to keep CAP_SETPCAP for a bit
|
||||
* longer. Let's add it to our list hence for now. */
|
||||
if (q->bounding != (uint64_t) -1) {
|
||||
cap_value_t cv = CAP_SETPCAP;
|
||||
|
||||
modified = cap_dup(c);
|
||||
if (!modified)
|
||||
return -ENOMEM;
|
||||
|
||||
if (cap_set_flag(modified, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
|
||||
return -errno;
|
||||
if (cap_set_flag(modified, CAP_EFFECTIVE, 1, &cv, CAP_SET) < 0)
|
||||
return -errno;
|
||||
|
||||
if (cap_compare(modified, c) == 0) {
|
||||
/* No change? then drop this nonsense again */
|
||||
cap_free(modified);
|
||||
modified = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now, let's enforce the caps for the first time. Note that this is where we acquire
|
||||
* caps in any of the sets we currently don't have. We have to do this before
|
||||
* droppoing the bounding caps below, since at that point we can never acquire new
|
||||
* caps in inherited/permitted/effective anymore, but only lose them.*/
|
||||
if (cap_set_proc(modified ?: c) < 0)
|
||||
return -errno;
|
||||
|
||||
need_set_proc_again = !!modified;
|
||||
}
|
||||
}
|
||||
|
||||
if (q->bounding != (uint64_t) -1) {
|
||||
@ -483,5 +533,13 @@ int capability_quintet_enforce(const CapabilityQuintet *q) {
|
||||
return r;
|
||||
}
|
||||
|
||||
/* If needed, let's now set the caps again, this time in the final version, which differs from what
|
||||
* we have already set only in the CAP_SETPCAP bit, which we needed for dropping the bounding
|
||||
* bits. This call only undoes bits and doesn't acquire any which means the bounding caps don't
|
||||
* matter. */
|
||||
if (need_set_proc_again)
|
||||
if (cap_set_proc(c) < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -33,10 +33,12 @@ static inline void cap_free_charpp(char **p) {
|
||||
}
|
||||
#define _cleanup_cap_free_charp_ _cleanup_(cap_free_charpp)
|
||||
|
||||
static inline uint64_t all_capabilities(void) {
|
||||
return UINT64_MAX >> (63 - cap_last_cap());
|
||||
}
|
||||
|
||||
static inline bool cap_test_all(uint64_t caps) {
|
||||
uint64_t m;
|
||||
m = (UINT64_C(1) << (cap_last_cap() + 1)) - 1;
|
||||
return FLAGS_SET(caps, m);
|
||||
return FLAGS_SET(caps, all_capabilities());
|
||||
}
|
||||
|
||||
bool ambient_capabilities_supported(void);
|
||||
|
@ -10,6 +10,8 @@ libnspawn_core_sources = files('''
|
||||
nspawn-mount.h
|
||||
nspawn-network.c
|
||||
nspawn-network.h
|
||||
nspawn-oci.c
|
||||
nspawn-oci.h
|
||||
nspawn-patch-uid.c
|
||||
nspawn-patch-uid.h
|
||||
nspawn-register.c
|
||||
|
@ -263,7 +263,7 @@ static int mount_legacy_cgroup_hierarchy(
|
||||
if (r > 0)
|
||||
return 0;
|
||||
|
||||
mkdir_p(to, 0755);
|
||||
(void) mkdir_p(to, 0755);
|
||||
|
||||
/* The superblock mount options of the mount point need to be
|
||||
* identical to the hosts', and hence writable... */
|
||||
|
@ -62,6 +62,7 @@ Files.Volatile, config_parse_volatile_mode, 0, of
|
||||
Files.Bind, config_parse_bind, 0, 0
|
||||
Files.BindReadOnly, config_parse_bind, 1, 0
|
||||
Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0
|
||||
Files.Inaccessible, config_parse_inaccessible, 0, 0
|
||||
Files.Overlay, config_parse_overlay, 0, 0
|
||||
Files.OverlayReadOnly, config_parse_overlay, 1, 0
|
||||
Files.PrivateUsersChown, config_parse_tristate, 0, offsetof(Settings, userns_chown)
|
||||
|
@ -65,6 +65,7 @@ void custom_mount_free_all(CustomMount *l, size_t n) {
|
||||
}
|
||||
|
||||
strv_free(m->lower);
|
||||
free(m->type_argument);
|
||||
}
|
||||
|
||||
free(l);
|
||||
@ -116,6 +117,13 @@ int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) {
|
||||
for (i = 0; i < n; i++) {
|
||||
CustomMount *m = l + i;
|
||||
|
||||
/* /proc we mount in the inner child, i.e. when we acquired CLONE_NEWPID. All other mounts we mount
|
||||
* already in the outer child, so that the mounts are already established before CLONE_NEWPID and in
|
||||
* particular CLONE_NEWUSER. This also means any custom mounts below /proc also need to be mounted in
|
||||
* the inner child, not the outer one. Determine this here. */
|
||||
m->in_userns = path_startswith(m->destination, "/proc");
|
||||
|
||||
if (m->type == CUSTOM_MOUNT_BIND) {
|
||||
if (m->source) {
|
||||
char *s;
|
||||
|
||||
@ -143,6 +151,7 @@ int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) {
|
||||
if (mkdir(m->source, 0755) < 0)
|
||||
return log_error_errno(errno, "Failed to create %s: %m", m->source);
|
||||
}
|
||||
}
|
||||
|
||||
if (m->type == CUSTOM_MOUNT_OVERLAY) {
|
||||
char **j;
|
||||
@ -223,6 +232,7 @@ int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only)
|
||||
m->destination = TAKE_PTR(destination);
|
||||
m->read_only = read_only;
|
||||
m->options = TAKE_PTR(opts);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -327,6 +337,29 @@ int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_onl
|
||||
return 0;
|
||||
}
|
||||
|
||||
int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s) {
|
||||
_cleanup_free_ char *path = NULL;
|
||||
CustomMount *m;
|
||||
|
||||
assert(l);
|
||||
assert(n);
|
||||
assert(s);
|
||||
|
||||
if (!path_is_absolute(s))
|
||||
return -EINVAL;
|
||||
|
||||
path = strdup(s);
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
m = custom_mount_add(l, n, CUSTOM_MOUNT_INACCESSIBLE);
|
||||
if (!m)
|
||||
return -ENOMEM;
|
||||
|
||||
m->destination = TAKE_PTR(path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tmpfs_patch_options(
|
||||
const char *options,
|
||||
uid_t uid_shift,
|
||||
@ -494,9 +527,9 @@ int mount_all(const char *dest,
|
||||
uid_t uid_shift,
|
||||
const char *selinux_apifs_context) {
|
||||
|
||||
#define PROC_INACCESSIBLE(path) \
|
||||
{ NULL, (path), NULL, NULL, MS_BIND, \
|
||||
MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_INACCESSIBLE_REG }, /* Bind mount first ... */ \
|
||||
#define PROC_INACCESSIBLE_REG(path) \
|
||||
{ "/run/systemd/inaccessible/reg", (path), NULL, NULL, MS_BIND, \
|
||||
MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
|
||||
{ NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
|
||||
MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
|
||||
|
||||
@ -531,11 +564,11 @@ int mount_all(const char *dest,
|
||||
|
||||
/* Make these files inaccessible to container payloads: they potentially leak information about kernel
|
||||
* internals or the host's execution environment to the container */
|
||||
PROC_INACCESSIBLE("/proc/kallsyms"),
|
||||
PROC_INACCESSIBLE("/proc/kcore"),
|
||||
PROC_INACCESSIBLE("/proc/keys"),
|
||||
PROC_INACCESSIBLE("/proc/sysrq-trigger"),
|
||||
PROC_INACCESSIBLE("/proc/timer_list"),
|
||||
PROC_INACCESSIBLE_REG("/proc/kallsyms"),
|
||||
PROC_INACCESSIBLE_REG("/proc/kcore"),
|
||||
PROC_INACCESSIBLE_REG("/proc/keys"),
|
||||
PROC_INACCESSIBLE_REG("/proc/sysrq-trigger"),
|
||||
PROC_INACCESSIBLE_REG("/proc/timer_list"),
|
||||
|
||||
/* Make these directories read-only to container payloads: they show hardware information, and in some
|
||||
* cases contain tunables the container really shouldn't have access to. */
|
||||
@ -573,7 +606,6 @@ int mount_all(const char *dest,
|
||||
#endif
|
||||
};
|
||||
|
||||
_cleanup_(unlink_and_freep) char *inaccessible = NULL;
|
||||
bool use_userns = (mount_settings & MOUNT_USE_USERNS);
|
||||
bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS);
|
||||
bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO);
|
||||
@ -584,7 +616,7 @@ int mount_all(const char *dest,
|
||||
|
||||
for (k = 0; k < ELEMENTSOF(mount_table); k++) {
|
||||
_cleanup_free_ char *where = NULL, *options = NULL;
|
||||
const char *o, *what;
|
||||
const char *o;
|
||||
bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL);
|
||||
|
||||
if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS))
|
||||
@ -603,33 +635,14 @@ int mount_all(const char *dest,
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, mount_table[k].where);
|
||||
|
||||
if (mount_table[k].mount_settings & MOUNT_INACCESSIBLE_REG) {
|
||||
|
||||
if (!inaccessible) {
|
||||
_cleanup_free_ char *np = NULL;
|
||||
|
||||
r = tempfn_random_child(NULL, "inaccessible", &np);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to generate inaccessible file node path: %m");
|
||||
|
||||
r = touch_file(np, false, USEC_INFINITY, UID_INVALID, GID_INVALID, 0000);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create inaccessible file node '%s': %m", np);
|
||||
|
||||
inaccessible = TAKE_PTR(np);
|
||||
}
|
||||
|
||||
what = inaccessible;
|
||||
} else
|
||||
what = mount_table[k].what;
|
||||
|
||||
/* Skip this entry if it is not a remount. */
|
||||
if (mount_table[k].what) {
|
||||
r = path_is_mount_point(where, NULL, 0);
|
||||
if (r < 0 && r != -ENOENT)
|
||||
return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
|
||||
|
||||
/* Skip this entry if it is not a remount. */
|
||||
if (what && r > 0)
|
||||
if (r > 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
r = mkdir_userns_p(dest, where, 0755, (use_userns && !in_userns) ? uid_shift : UID_INVALID);
|
||||
if (r < 0 && r != -EEXIST) {
|
||||
@ -654,7 +667,7 @@ int mount_all(const char *dest,
|
||||
}
|
||||
|
||||
r = mount_verbose(fatal ? LOG_ERR : LOG_DEBUG,
|
||||
what,
|
||||
mount_table[k].what,
|
||||
where,
|
||||
mount_table[k].type,
|
||||
mount_table[k].flags,
|
||||
@ -667,7 +680,6 @@ int mount_all(const char *dest,
|
||||
}
|
||||
|
||||
static int mount_bind(const char *dest, CustomMount *m) {
|
||||
|
||||
_cleanup_free_ char *where = NULL;
|
||||
struct stat source_st, dest_st;
|
||||
int r;
|
||||
@ -711,7 +723,6 @@ static int mount_bind(const char *dest, CustomMount *m) {
|
||||
r = touch(where);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create mount point %s: %m", where);
|
||||
|
||||
}
|
||||
|
||||
r = mount_verbose(LOG_ERR, m->source, where, NULL, MS_BIND | MS_REC, m->options);
|
||||
@ -773,7 +784,6 @@ static char *joined_and_escaped_lower_dirs(char **lower) {
|
||||
}
|
||||
|
||||
static int mount_overlay(const char *dest, CustomMount *m) {
|
||||
|
||||
_cleanup_free_ char *lower = NULL, *where = NULL, *escaped_source = NULL;
|
||||
const char *options;
|
||||
int r;
|
||||
@ -815,11 +825,59 @@ static int mount_overlay(const char *dest, CustomMount *m) {
|
||||
return mount_verbose(LOG_ERR, "overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options);
|
||||
}
|
||||
|
||||
static int mount_inaccessible(const char *dest, CustomMount *m) {
|
||||
_cleanup_free_ char *where = NULL;
|
||||
const char *source;
|
||||
struct stat st;
|
||||
int r;
|
||||
|
||||
assert(dest);
|
||||
assert(m);
|
||||
|
||||
r = chase_symlinks_and_stat(m->destination, dest, CHASE_PREFIX_ROOT, &where, &st);
|
||||
if (r < 0) {
|
||||
log_full_errno(m->graceful ? LOG_DEBUG : LOG_ERR, r, "Failed to resolve %s/%s: %m", dest, m->destination);
|
||||
return m->graceful ? 0 : r;
|
||||
}
|
||||
|
||||
assert_se(source = mode_to_inaccessible_node(st.st_mode));
|
||||
|
||||
r = mount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, source, where, NULL, MS_BIND, NULL);
|
||||
if (r < 0)
|
||||
return m->graceful ? 0 : r;
|
||||
|
||||
r = mount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, NULL, where, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL);
|
||||
if (r < 0)
|
||||
return m->graceful ? 0 : r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mount_arbitrary(const char *dest, CustomMount *m) {
|
||||
_cleanup_free_ char *where = NULL;
|
||||
int r;
|
||||
|
||||
assert(dest);
|
||||
assert(m);
|
||||
|
||||
r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
|
||||
if (r == 0) { /* Doesn't exist yet? */
|
||||
r = mkdir_p_label(where, 0755);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Creating mount point for mount %s failed: %m", where);
|
||||
}
|
||||
|
||||
return mount_verbose(LOG_ERR, m->source, where, m->type_argument, 0, m->options);
|
||||
}
|
||||
|
||||
int mount_custom(
|
||||
const char *dest,
|
||||
CustomMount *mounts, size_t n,
|
||||
bool userns, uid_t uid_shift, uid_t uid_range,
|
||||
const char *selinux_apifs_context) {
|
||||
const char *selinux_apifs_context,
|
||||
bool in_userns) {
|
||||
|
||||
size_t i;
|
||||
int r;
|
||||
@ -829,6 +887,9 @@ int mount_custom(
|
||||
for (i = 0; i < n; i++) {
|
||||
CustomMount *m = mounts + i;
|
||||
|
||||
if (m->in_userns != in_userns)
|
||||
continue;
|
||||
|
||||
switch (m->type) {
|
||||
|
||||
case CUSTOM_MOUNT_BIND:
|
||||
@ -843,6 +904,14 @@ int mount_custom(
|
||||
r = mount_overlay(dest, m);
|
||||
break;
|
||||
|
||||
case CUSTOM_MOUNT_INACCESSIBLE:
|
||||
r = mount_inaccessible(dest, m);
|
||||
break;
|
||||
|
||||
case CUSTOM_MOUNT_ARBITRARY:
|
||||
r = mount_arbitrary(dest, m);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert_not_reached("Unknown custom mount type");
|
||||
}
|
||||
|
@ -13,14 +13,15 @@ typedef enum MountSettingsMask {
|
||||
MOUNT_APPLY_APIVFS_RO = 1 << 3, /* if set, /proc/sys, and /sys will be mounted read-only, otherwise read-write. */
|
||||
MOUNT_APPLY_APIVFS_NETNS = 1 << 4, /* if set, /proc/sys/net will be mounted read-write.
|
||||
Works only if MOUNT_APPLY_APIVFS_RO is also set. */
|
||||
MOUNT_INACCESSIBLE_REG = 1 << 5, /* if set, create an inaccessible regular file first and use as bind mount source */
|
||||
MOUNT_APPLY_TMPFS_TMP = 1 << 6, /* if set, /tmp will be mounted as tmpfs */
|
||||
MOUNT_APPLY_TMPFS_TMP = 1 << 5, /* if set, /tmp will be mounted as tmpfs */
|
||||
} MountSettingsMask;
|
||||
|
||||
typedef enum CustomMountType {
|
||||
CUSTOM_MOUNT_BIND,
|
||||
CUSTOM_MOUNT_TMPFS,
|
||||
CUSTOM_MOUNT_OVERLAY,
|
||||
CUSTOM_MOUNT_INACCESSIBLE,
|
||||
CUSTOM_MOUNT_ARBITRARY,
|
||||
_CUSTOM_MOUNT_TYPE_MAX,
|
||||
_CUSTOM_MOUNT_TYPE_INVALID = -1
|
||||
} CustomMountType;
|
||||
@ -34,6 +35,9 @@ typedef struct CustomMount {
|
||||
char *work_dir;
|
||||
char **lower;
|
||||
char *rm_rf_tmpdir;
|
||||
char *type_argument; /* only for CUSTOM_MOUNT_ARBITRARY */
|
||||
bool graceful;
|
||||
bool in_userns;
|
||||
} CustomMount;
|
||||
|
||||
CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t);
|
||||
@ -43,11 +47,12 @@ int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n);
|
||||
int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only);
|
||||
int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s);
|
||||
int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only);
|
||||
int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s);
|
||||
|
||||
int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, const char *selinux_apifs_context);
|
||||
int mount_sysfs(const char *dest, MountSettingsMask mount_settings);
|
||||
|
||||
int mount_custom(const char *dest, CustomMount *mounts, size_t n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
|
||||
int mount_custom(const char *dest, CustomMount *mounts, size_t n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, bool in_userns);
|
||||
|
||||
int setup_volatile_mode(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
|
||||
|
||||
|
2352
src/nspawn/nspawn-oci.c
Normal file
2352
src/nspawn/nspawn-oci.c
Normal file
File diff suppressed because it is too large
Load Diff
6
src/nspawn/nspawn-oci.h
Normal file
6
src/nspawn/nspawn-oci.h
Normal file
@ -0,0 +1,6 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1+ */
|
||||
#pragma once
|
||||
|
||||
#include "nspawn-settings.h"
|
||||
|
||||
int oci_load(FILE *f, const char *path, Settings **ret);
|
@ -112,6 +112,7 @@ int register_machine(
|
||||
unsigned n_mounts,
|
||||
int kill_signal,
|
||||
char **properties,
|
||||
sd_bus_message *properties_message,
|
||||
bool keep_unit,
|
||||
const char *service) {
|
||||
|
||||
@ -185,6 +186,12 @@ int register_machine(
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (properties_message) {
|
||||
r = sd_bus_message_copy(m, properties_message, true);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
}
|
||||
|
||||
r = bus_append_unit_property_assignment_many(m, UNIT_SERVICE, properties);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@ -235,7 +242,8 @@ int allocate_scope(
|
||||
CustomMount *mounts,
|
||||
unsigned n_mounts,
|
||||
int kill_signal,
|
||||
char **properties) {
|
||||
char **properties,
|
||||
sd_bus_message *properties_message) {
|
||||
|
||||
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
|
||||
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
|
||||
@ -289,6 +297,12 @@ int allocate_scope(
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (properties_message) {
|
||||
r = sd_bus_message_copy(m, properties_message, true);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
}
|
||||
|
||||
r = append_machine_properties(
|
||||
m,
|
||||
mounts,
|
||||
|
@ -7,8 +7,8 @@
|
||||
|
||||
#include "nspawn-mount.h"
|
||||
|
||||
int register_machine(sd_bus *bus, const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, bool keep_unit, const char *service);
|
||||
int register_machine(sd_bus *bus, const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, sd_bus_message *properties_message, bool keep_unit, const char *service);
|
||||
int terminate_machine(sd_bus *bus, const char *machine_name);
|
||||
|
||||
int allocate_scope(sd_bus *bus, const char *machine_name, pid_t pid, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties);
|
||||
int allocate_scope(sd_bus *bus, const char *machine_name, pid_t pid, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, sd_bus_message *properties_message);
|
||||
int terminate_scope(sd_bus *bus, const char *machine_name);
|
||||
|
@ -17,6 +17,50 @@
|
||||
#include "user-util.h"
|
||||
#include "util.h"
|
||||
|
||||
Settings *settings_new(void) {
|
||||
Settings *s;
|
||||
|
||||
s = new(Settings, 1);
|
||||
if (!s)
|
||||
return NULL;
|
||||
|
||||
*s = (Settings) {
|
||||
.start_mode = _START_MODE_INVALID,
|
||||
.personality = PERSONALITY_INVALID,
|
||||
|
||||
.resolv_conf = _RESOLV_CONF_MODE_INVALID,
|
||||
.link_journal = _LINK_JOURNAL_INVALID,
|
||||
.timezone = _TIMEZONE_MODE_INVALID,
|
||||
|
||||
.userns_mode = _USER_NAMESPACE_MODE_INVALID,
|
||||
.userns_chown = -1,
|
||||
.uid_shift = UID_INVALID,
|
||||
.uid_range = UID_INVALID,
|
||||
|
||||
.no_new_privileges = -1,
|
||||
|
||||
.read_only = -1,
|
||||
.volatile_mode = _VOLATILE_MODE_INVALID,
|
||||
|
||||
.private_network = -1,
|
||||
.network_veth = -1,
|
||||
|
||||
.full_capabilities = CAPABILITY_QUINTET_NULL,
|
||||
|
||||
.uid = UID_INVALID,
|
||||
.gid = GID_INVALID,
|
||||
|
||||
.console_mode = _CONSOLE_MODE_INVALID,
|
||||
.console_width = (unsigned) -1,
|
||||
.console_height = (unsigned) -1,
|
||||
|
||||
.clone_ns_flags = (unsigned long) -1,
|
||||
.use_cgns = -1,
|
||||
};
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
int settings_load(FILE *f, const char *path, Settings **ret) {
|
||||
_cleanup_(settings_freep) Settings *s = NULL;
|
||||
int r;
|
||||
@ -24,27 +68,10 @@ int settings_load(FILE *f, const char *path, Settings **ret) {
|
||||
assert(path);
|
||||
assert(ret);
|
||||
|
||||
s = new0(Settings, 1);
|
||||
s = settings_new();
|
||||
if (!s)
|
||||
return -ENOMEM;
|
||||
|
||||
s->start_mode = _START_MODE_INVALID;
|
||||
s->personality = PERSONALITY_INVALID;
|
||||
s->userns_mode = _USER_NAMESPACE_MODE_INVALID;
|
||||
s->resolv_conf = _RESOLV_CONF_MODE_INVALID;
|
||||
s->link_journal = _LINK_JOURNAL_INVALID;
|
||||
s->timezone = _TIMEZONE_MODE_INVALID;
|
||||
s->uid_shift = UID_INVALID;
|
||||
s->uid_range = UID_INVALID;
|
||||
s->no_new_privileges = -1;
|
||||
|
||||
s->read_only = -1;
|
||||
s->volatile_mode = _VOLATILE_MODE_INVALID;
|
||||
s->userns_chown = -1;
|
||||
|
||||
s->private_network = -1;
|
||||
s->network_veth = -1;
|
||||
|
||||
r = config_parse(NULL, path, f,
|
||||
"Exec\0"
|
||||
"Network\0"
|
||||
@ -66,12 +93,33 @@ int settings_load(FILE *f, const char *path, Settings **ret) {
|
||||
s->userns_mode = USER_NAMESPACE_NO;
|
||||
|
||||
*ret = TAKE_PTR(s);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Settings* settings_free(Settings *s) {
|
||||
static void free_oci_hooks(OciHook *h, size_t n) {
|
||||
size_t i;
|
||||
|
||||
assert(h || n == 0);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
free(h[i].path);
|
||||
strv_free(h[i].args);
|
||||
strv_free(h[i].env);
|
||||
}
|
||||
|
||||
free(h);
|
||||
}
|
||||
|
||||
void device_node_free_many(DeviceNode *node, size_t n) {
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
free(node[i].path);
|
||||
|
||||
free(node);
|
||||
}
|
||||
|
||||
Settings* settings_free(Settings *s) {
|
||||
if (!s)
|
||||
return NULL;
|
||||
|
||||
@ -96,6 +144,28 @@ Settings* settings_free(Settings *s) {
|
||||
expose_port_free_all(s->expose_ports);
|
||||
|
||||
custom_mount_free_all(s->custom_mounts, s->n_custom_mounts);
|
||||
|
||||
free(s->bundle);
|
||||
free(s->root);
|
||||
|
||||
free_oci_hooks(s->oci_hooks_prestart, s->n_oci_hooks_prestart);
|
||||
free_oci_hooks(s->oci_hooks_poststart, s->n_oci_hooks_poststart);
|
||||
free_oci_hooks(s->oci_hooks_poststop, s->n_oci_hooks_poststop);
|
||||
|
||||
free(s->slice);
|
||||
sd_bus_message_unref(s->properties);
|
||||
|
||||
free(s->supplementary_gids);
|
||||
device_node_free_many(s->extra_nodes, s->n_extra_nodes);
|
||||
free(s->extra_nodes);
|
||||
free(s->network_namespace_path);
|
||||
|
||||
strv_free(s->sysctl);
|
||||
|
||||
#if HAVE_SECCOMP
|
||||
seccomp_release(s->seccomp);
|
||||
#endif
|
||||
|
||||
return mfree(s);
|
||||
}
|
||||
|
||||
@ -122,6 +192,26 @@ bool settings_network_veth(Settings *s) {
|
||||
s->network_zone;
|
||||
}
|
||||
|
||||
int settings_allocate_properties(Settings *s) {
|
||||
_cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
|
||||
int r;
|
||||
|
||||
assert(s);
|
||||
|
||||
if (s->properties)
|
||||
return 0;
|
||||
|
||||
r = sd_bus_default_system(&bus);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_bus_message_new(bus, &s->properties, SD_BUS_MESSAGE_METHOD_CALL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_CONFIG_PARSE_ENUM(config_parse_volatile_mode, volatile_mode, VolatileMode, "Failed to parse volatile mode");
|
||||
|
||||
int config_parse_expose_port(
|
||||
@ -315,6 +405,34 @@ int config_parse_tmpfs(
|
||||
return 0;
|
||||
}
|
||||
|
||||
int config_parse_inaccessible(
|
||||
const char *unit,
|
||||
const char *filename,
|
||||
unsigned line,
|
||||
const char *section,
|
||||
unsigned section_line,
|
||||
const char *lvalue,
|
||||
int ltype,
|
||||
const char *rvalue,
|
||||
void *data,
|
||||
void *userdata) {
|
||||
|
||||
Settings *settings = data;
|
||||
int r;
|
||||
|
||||
assert(filename);
|
||||
assert(lvalue);
|
||||
assert(rvalue);
|
||||
|
||||
r = inaccessible_mount_parse(&settings->custom_mounts, &settings->n_custom_mounts, rvalue);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, r, "Invalid inaccessible file system specification %s: %m", rvalue);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int config_parse_overlay(
|
||||
const char *unit,
|
||||
const char *filename,
|
||||
|
@ -4,8 +4,14 @@
|
||||
#include <sched.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if HAVE_SECCOMP
|
||||
#include <seccomp.h>
|
||||
#endif
|
||||
|
||||
#include "sd-bus.h"
|
||||
#include "sd-id128.h"
|
||||
|
||||
#include "capability-util.h"
|
||||
#include "conf-parser.h"
|
||||
#include "macro.h"
|
||||
#include "missing_resource.h"
|
||||
@ -60,6 +66,15 @@ typedef enum TimezoneMode {
|
||||
_TIMEZONE_MODE_INVALID = -1
|
||||
} TimezoneMode;
|
||||
|
||||
typedef enum ConsoleMode {
|
||||
CONSOLE_INTERACTIVE,
|
||||
CONSOLE_READ_ONLY,
|
||||
CONSOLE_PASSIVE,
|
||||
CONSOLE_PIPE,
|
||||
_CONSOLE_MODE_MAX,
|
||||
_CONSOLE_MODE_INVALID = -1,
|
||||
} ConsoleMode;
|
||||
|
||||
typedef enum SettingsMask {
|
||||
SETTING_START_MODE = UINT64_C(1) << 0,
|
||||
SETTING_ENVIRONMENT = UINT64_C(1) << 1,
|
||||
@ -86,9 +101,14 @@ typedef enum SettingsMask {
|
||||
SETTING_LINK_JOURNAL = UINT64_C(1) << 22,
|
||||
SETTING_TIMEZONE = UINT64_C(1) << 23,
|
||||
SETTING_EPHEMERAL = UINT64_C(1) << 24,
|
||||
SETTING_RLIMIT_FIRST = UINT64_C(1) << 25, /* we define one bit per resource limit here */
|
||||
SETTING_RLIMIT_LAST = UINT64_C(1) << (25 + _RLIMIT_MAX - 1),
|
||||
_SETTINGS_MASK_ALL = (UINT64_C(1) << (25 + _RLIMIT_MAX)) -1,
|
||||
SETTING_SLICE = UINT64_C(1) << 25,
|
||||
SETTING_DIRECTORY = UINT64_C(1) << 26,
|
||||
SETTING_USE_CGNS = UINT64_C(1) << 27,
|
||||
SETTING_CLONE_NS_FLAGS = UINT64_C(1) << 28,
|
||||
SETTING_CONSOLE_MODE = UINT64_C(1) << 29,
|
||||
SETTING_RLIMIT_FIRST = UINT64_C(1) << 30, /* we define one bit per resource limit here */
|
||||
SETTING_RLIMIT_LAST = UINT64_C(1) << (30 + _RLIMIT_MAX - 1),
|
||||
_SETTINGS_MASK_ALL = (UINT64_C(1) << (30 + _RLIMIT_MAX)) -1,
|
||||
_SETTING_FORCE_ENUM_WIDTH = UINT64_MAX
|
||||
} SettingsMask;
|
||||
|
||||
@ -101,6 +121,22 @@ assert_cc(sizeof(SettingsMask) == 8);
|
||||
assert_cc(sizeof(SETTING_RLIMIT_FIRST) == 8);
|
||||
assert_cc(sizeof(SETTING_RLIMIT_LAST) == 8);
|
||||
|
||||
typedef struct DeviceNode {
|
||||
char *path;
|
||||
unsigned major;
|
||||
unsigned minor;
|
||||
mode_t mode;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
} DeviceNode;
|
||||
|
||||
typedef struct OciHook {
|
||||
char *path;
|
||||
char **args;
|
||||
char **env;
|
||||
usec_t timeout;
|
||||
} OciHook;
|
||||
|
||||
typedef struct Settings {
|
||||
/* [Run] */
|
||||
StartMode start_mode;
|
||||
@ -150,13 +186,39 @@ typedef struct Settings {
|
||||
char **network_ipvlan;
|
||||
char **network_veth_extra;
|
||||
ExposePort *expose_ports;
|
||||
|
||||
/* Additional fields, that are specific to OCI runtime case */
|
||||
char *bundle;
|
||||
char *root;
|
||||
OciHook *oci_hooks_prestart, *oci_hooks_poststart, *oci_hooks_poststop;
|
||||
size_t n_oci_hooks_prestart, n_oci_hooks_poststart, n_oci_hooks_poststop;
|
||||
char *slice;
|
||||
sd_bus_message *properties;
|
||||
CapabilityQuintet full_capabilities;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
gid_t *supplementary_gids;
|
||||
size_t n_supplementary_gids;
|
||||
unsigned console_width, console_height;
|
||||
ConsoleMode console_mode;
|
||||
DeviceNode *extra_nodes;
|
||||
size_t n_extra_nodes;
|
||||
unsigned long clone_ns_flags;
|
||||
char *network_namespace_path;
|
||||
int use_cgns;
|
||||
char **sysctl;
|
||||
#if HAVE_SECCOMP
|
||||
scmp_filter_ctx seccomp;
|
||||
#endif
|
||||
} Settings;
|
||||
|
||||
Settings *settings_new(void);
|
||||
int settings_load(FILE *f, const char *path, Settings **ret);
|
||||
Settings* settings_free(Settings *s);
|
||||
|
||||
bool settings_network_veth(Settings *s);
|
||||
bool settings_private_network(Settings *s);
|
||||
int settings_allocate_properties(Settings *s);
|
||||
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(Settings*, settings_free);
|
||||
|
||||
@ -170,6 +232,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_pivot_root);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_bind);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_tmpfs);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_overlay);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_inaccessible);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_veth_extra);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_network_zone);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_boot);
|
||||
@ -190,3 +253,5 @@ const char *timezone_mode_to_string(TimezoneMode a) _const_;
|
||||
TimezoneMode timezone_mode_from_string(const char *s) _pure_;
|
||||
|
||||
int parse_link_journal(const char *s, LinkJournal *ret_mode, bool *ret_try);
|
||||
|
||||
void device_node_free_many(DeviceNode *node, size_t n);
|
||||
|
@ -59,14 +59,41 @@ static int spawn_getent(const char *database, const char *key, pid_t *rpid) {
|
||||
return pipe_fds[0];
|
||||
}
|
||||
|
||||
int change_uid_gid_raw(
|
||||
uid_t uid,
|
||||
gid_t gid,
|
||||
const gid_t *supplementary_gids,
|
||||
size_t n_supplementary_gids) {
|
||||
|
||||
if (!uid_is_valid(uid))
|
||||
uid = 0;
|
||||
if (!gid_is_valid(gid))
|
||||
gid = 0;
|
||||
|
||||
(void) fchown(STDIN_FILENO, uid, gid);
|
||||
(void) fchown(STDOUT_FILENO, uid, gid);
|
||||
(void) fchown(STDERR_FILENO, uid, gid);
|
||||
|
||||
if (setgroups(n_supplementary_gids, supplementary_gids) < 0)
|
||||
return log_error_errno(errno, "Failed to set auxiliary groups: %m");
|
||||
|
||||
if (setresgid(gid, gid, gid) < 0)
|
||||
return log_error_errno(errno, "setresgid() failed: %m");
|
||||
|
||||
if (setresuid(uid, uid, uid) < 0)
|
||||
return log_error_errno(errno, "setresuid() failed: %m");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int change_uid_gid(const char *user, char **_home) {
|
||||
char *x, *u, *g, *h;
|
||||
const char *word, *state;
|
||||
_cleanup_free_ uid_t *uids = NULL;
|
||||
_cleanup_free_ gid_t *gids = NULL;
|
||||
_cleanup_free_ char *home = NULL, *line = NULL;
|
||||
_cleanup_fclose_ FILE *f = NULL;
|
||||
_cleanup_close_ int fd = -1;
|
||||
unsigned n_uids = 0;
|
||||
unsigned n_gids = 0;
|
||||
size_t sz = 0, l;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
@ -189,10 +216,10 @@ int change_uid_gid(const char *user, char **_home) {
|
||||
memcpy(c, word, l);
|
||||
c[l] = 0;
|
||||
|
||||
if (!GREEDY_REALLOC(uids, sz, n_uids+1))
|
||||
if (!GREEDY_REALLOC(gids, sz, n_gids+1))
|
||||
return log_oom();
|
||||
|
||||
r = parse_uid(c, &uids[n_uids++]);
|
||||
r = parse_gid(c, &gids[n_gids++]);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse group data from getent: %m");
|
||||
}
|
||||
@ -205,18 +232,9 @@ int change_uid_gid(const char *user, char **_home) {
|
||||
if (r < 0 && !IN_SET(r, -EEXIST, -ENOTDIR))
|
||||
return log_error_errno(r, "Failed to make home directory: %m");
|
||||
|
||||
(void) fchown(STDIN_FILENO, uid, gid);
|
||||
(void) fchown(STDOUT_FILENO, uid, gid);
|
||||
(void) fchown(STDERR_FILENO, uid, gid);
|
||||
|
||||
if (setgroups(n_uids, uids) < 0)
|
||||
return log_error_errno(errno, "Failed to set auxiliary groups: %m");
|
||||
|
||||
if (setresgid(gid, gid, gid) < 0)
|
||||
return log_error_errno(errno, "setresgid() failed: %m");
|
||||
|
||||
if (setresuid(uid, uid, uid) < 0)
|
||||
return log_error_errno(errno, "setresuid() failed: %m");
|
||||
r = change_uid_gid_raw(uid, gid, gids, n_gids);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (_home)
|
||||
*_home = TAKE_PTR(home);
|
||||
|
@ -1,4 +1,5 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1+ */
|
||||
#pragma once
|
||||
|
||||
int change_uid_gid(const char *user, char **ret);
|
||||
int change_uid_gid_raw(uid_t uid, gid_t gid, const gid_t *supplementary_gids, size_t n_supplementary_gids);
|
||||
int change_uid_gid(const char *user, char **ret_home);
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user