core: add id-mapped mount support for Exec directories

This commit is contained in:
Andres Beltran 2024-08-05 23:43:29 +00:00
parent edae62120f
commit eae5127246
9 changed files with 189 additions and 13 deletions

View File

@ -1476,6 +1476,13 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
below the locations defined in the following table. Also, the corresponding environment variable will
be defined with the full paths of the directories. If multiple directories are set, then in the
environment variable the paths are concatenated with colon (<literal>:</literal>).</para>
<para>If <varname>DynamicUser=</varname> is used, and if the kernel version supports
<ulink url="https://lwn.net/Articles/896255/">id-mapped mounts</ulink>, the specified directories will
be owned by "nobody" in the host namespace and will be mapped to (and will be owned by) the service's
UID/GID in its own namespace. For backward compatibility, existing directories created without id-mapped
mounts will be kept untouched.</para>
<table>
<title>Automatic directory creation and environment variables</title>
<tgroup cols='4'>

View File

@ -2554,7 +2554,8 @@ static int setup_exec_directory(
/* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
* specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
* current UID/GID ownership.) */
r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
const char *target_dir = pp ?: p;
r = chmod_and_chown(target_dir, context->directories[type].mode, UID_INVALID, GID_INVALID);
if (r < 0)
goto fail;
@ -2563,12 +2564,51 @@ static int setup_exec_directory(
if (params->runtime_scope != RUNTIME_SCOPE_SYSTEM)
continue;
/* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
int idmapping_supported = is_idmapping_supported(target_dir);
if (idmapping_supported < 0) {
r = log_debug_errno(idmapping_supported, "Unable to determine if ID mapping is supported on mount '%s': %m", target_dir);
goto fail;
}
log_debug("ID-mapping is%ssupported for exec directory %s", idmapping_supported ? " " : " not ", target_dir);
/* Change the ownership of the whole tree, if necessary. When dynamic users are used we
* drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
* assignments to exist. */
r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777, AT_SYMLINK_FOLLOW);
if (r < 0)
goto fail;
uid_t chown_uid = uid;
gid_t chown_gid = gid;
bool do_chown = false;
if (uid == 0 || gid == 0 || !idmapping_supported) {
do_chown = true;
i->idmapped = false;
} else {
/* Use 'nobody' uid/gid for exec directories if ID-mapping is supported. For backward compatibility,
* continue doing chmod/chown if the directory was chmod/chowned before (if uid/gid is not 'nobody') */
struct stat st;
r = RET_NERRNO(stat(target_dir, &st));
if (r < 0)
goto fail;
if (st.st_uid == UID_NOBODY && st.st_gid == GID_NOBODY) {
do_chown = false;
i->idmapped = true;
} else if (exec_directory_is_private(context, type) && st.st_uid == 0 && st.st_gid == 0) {
chown_uid = UID_NOBODY;
chown_gid = GID_NOBODY;
do_chown = true;
i->idmapped = true;
} else {
do_chown = true;
i->idmapped = false;
}
}
if (do_chown) {
r = path_chown_recursive(target_dir, chown_uid, chown_gid, context->dynamic_user ? 01777 : 07777, AT_SYMLINK_FOLLOW);
if (r < 0)
goto fail;
}
}
/* If we are not going to run in a namespace, set up the symlinks - otherwise
@ -2620,6 +2660,8 @@ static int setup_smack(
static int compile_bind_mounts(
const ExecContext *context,
const ExecParameters *params,
uid_t exec_directory_uid, /* only used for id-mapped mounts Exec directories */
gid_t exec_directory_gid, /* only used for id-mapped mounts Exec directories */
BindMount **ret_bind_mounts,
size_t *ret_n_bind_mounts,
char ***ret_empty_directories) {
@ -2721,6 +2763,9 @@ static int compile_bind_mounts(
.nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
.recursive = true,
.read_only = FLAGS_SET(i->flags, EXEC_DIRECTORY_READ_ONLY),
.idmapped = i->idmapped,
.uid = exec_directory_uid,
.gid = exec_directory_gid,
};
}
}
@ -3057,7 +3102,9 @@ static int apply_mount_namespace(
ExecRuntime *runtime,
const char *memory_pressure_path,
bool needs_sandboxing,
char **reterr_path) {
char **reterr_path,
uid_t exec_directory_uid,
gid_t exec_directory_gid) {
_cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
_cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
@ -3095,7 +3142,7 @@ static int apply_mount_namespace(
return r;
}
r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
r = compile_bind_mounts(context, params, exec_directory_uid, exec_directory_gid, &bind_mounts, &n_bind_mounts, &empty_directories);
if (r < 0)
return r;
@ -4886,7 +4933,9 @@ int exec_invoke(
runtime,
memory_pressure_path,
needs_sandboxing,
&error_path);
&error_path,
uid,
gid);
if (r < 0) {
*exit_status = EXIT_NAMESPACE;
return log_exec_error_errno(context, params, r, "Failed to set up mount namespacing%s%s: %m",

View File

@ -163,6 +163,7 @@ typedef struct ExecDirectoryItem {
char *path;
char **symlinks;
ExecDirectoryFlags flags;
bool idmapped;
} ExecDirectoryItem;
typedef struct ExecDirectory {

View File

@ -113,6 +113,9 @@ typedef struct MountEntry {
LIST_HEAD(MountOptions, image_options_const);
char **overlay_layers;
VeritySettings verity;
bool idmapped;
uid_t idmap_uid;
gid_t idmap_gid;
} MountEntry;
typedef struct MountList {
@ -467,6 +470,9 @@ static int append_bind_mounts(MountList *ml, const BindMount *binds, size_t n) {
.flags = b->nodev ? MS_NODEV : 0,
.source_const = b->source,
.ignore = b->ignore_enoent,
.idmapped = b->idmapped,
.idmap_uid = b->uid,
.idmap_gid = b->gid,
};
}
@ -1892,6 +1898,45 @@ static int apply_one_mount(
}
log_debug("Successfully mounted %s to %s", what, mount_entry_path(m));
/* Take care of id-mapped mounts */
if (m->idmapped && uid_is_valid(m->idmap_uid) && gid_is_valid(m->idmap_gid)) {
_cleanup_close_ int userns_fd = -EBADF;
_cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
log_debug("Setting an id-mapped mount on %s", mount_entry_path(m));
/* Do mapping from nobody (in setup_exec_directory()) -> this uid */
if (strextendf(&uid_map, UID_FMT " " UID_FMT " " UID_FMT "\n", UID_NOBODY, (uid_t)m->idmap_uid, (uid_t)1u) < 0)
return log_oom();
/* Consider StateDirectory=xxx aaa xxx:aaa/222
* To allow for later symlink creation (by root) in create_symlinks_from_tuples(), map root as well. */
if (m->idmap_uid != (uid_t)0) {
if (strextendf(&uid_map, UID_FMT " " UID_FMT " " UID_FMT "\n", (uid_t)0, (uid_t)0, (uid_t)1u) < 0)
return log_oom();
}
if (strextendf(&gid_map, GID_FMT " " GID_FMT " " GID_FMT "\n", GID_NOBODY, (gid_t)m->idmap_gid, (gid_t)1u) < 0)
return log_oom();
if (m->idmap_gid != (gid_t)0) {
if (strextendf(&gid_map, GID_FMT " " GID_FMT " " GID_FMT "\n", (gid_t)0, (gid_t)0, (gid_t)1u) < 0)
return log_oom();
}
userns_fd = userns_acquire(uid_map, gid_map);
if (userns_fd < 0)
return log_error_errno(userns_fd, "Failed to allocate user namespace: %m");
/* Drop SUID, add NOEXEC for the mount to avoid root exploits */
r = remount_idmap_fd(STRV_MAKE(mount_entry_path(m)), userns_fd, MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV);
if (r < 0)
return log_error_errno(r, "Failed to create an id-mapped mount: %m");
log_debug("ID-mapped mount created successfully for %s from " UID_FMT " to " UID_FMT "", mount_entry_path(m), UID_NOBODY, m->idmap_uid);
}
return 1;
}

View File

@ -87,6 +87,9 @@ struct BindMount {
bool noexec;
bool recursive;
bool ignore_enoent;
bool idmapped;
uid_t uid;
gid_t gid;
};
struct TemporaryFileSystem {

View File

@ -2089,7 +2089,7 @@ static int mount_partition(
(void) fs_grow(node, -EBADF, p);
if (userns_fd >= 0) {
r = remount_idmap_fd(STRV_MAKE(p), userns_fd);
r = remount_idmap_fd(STRV_MAKE(p), userns_fd, /* extra_mount_attr_set= */ 0);
if (r < 0)
return r;
}

View File

@ -1386,7 +1386,8 @@ int make_userns(uid_t uid_shift, uid_t uid_range, uid_t source_owner, uid_t dest
int remount_idmap_fd(
char **paths,
int userns_fd) {
int userns_fd,
uint64_t extra_mount_attr_set) {
int r;
@ -1423,7 +1424,7 @@ int remount_idmap_fd(
/* Set the user namespace mapping attribute on the cloned mount point */
if (mount_setattr(mntfd, "", AT_EMPTY_PATH,
&(struct mount_attr) {
.attr_set = MOUNT_ATTR_IDMAP,
.attr_set = MOUNT_ATTR_IDMAP | extra_mount_attr_set,
.userns_fd = userns_fd,
}, sizeof(struct mount_attr)) < 0)
return log_debug_errno(errno, "Failed to change bind mount attributes for clone of '%s': %m", paths[i]);
@ -1460,7 +1461,7 @@ int remount_idmap(
if (userns_fd < 0)
return userns_fd;
return remount_idmap_fd(p, userns_fd);
return remount_idmap_fd(p, userns_fd, /* extra_mount_attr_set= */ 0);
}
static void sub_mount_clear(SubMount *s) {

View File

@ -163,7 +163,7 @@ typedef enum RemountIdmapping {
} RemountIdmapping;
int make_userns(uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
int remount_idmap_fd(char **p, int userns_fd);
int remount_idmap_fd(char **p, int userns_fd, uint64_t extra_mount_attr_set);
int remount_idmap(char **p, uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
int bind_mount_submounts(

View File

@ -162,6 +162,71 @@ EOF
systemctl start testservice-34-check-writable.service
}
test_check_idmapped_mounts() {
rm -rf /var/lib/testidmapped /var/lib/private/testidmapped
cat >/run/systemd/system/testservice-34-check-idmapped.service <<\EOF
[Unit]
Description=Check id-mapped directories when DynamicUser=yes with StateDirectory
[Service]
# Relevant only for sanitizer runs
EnvironmentFile=-/usr/lib/systemd/systemd-asan-env
Type=oneshot
MountAPIVFS=yes
DynamicUser=yes
PrivateUsers=yes
TemporaryFileSystem=/run /var/opt /var/lib /vol
UMask=0000
StateDirectory=testidmapped:sampleservice
ExecStart=/bin/bash -c ' \
set -eux; \
set -o pipefail; \
touch /var/lib/sampleservice/testfile; \
[[ $(awk "NR==2 {print \$1}" /proc/self/uid_map) == $(stat -c "%%u" /var/lib/private/testidmapped/testfile) ]]; \
'
EOF
systemctl daemon-reload
systemctl start testservice-34-check-idmapped.service
[[ $(stat -c "%u" /var/lib/private/testidmapped/testfile) == 65534 ]]
}
test_check_idmapped_mounts_root() {
rm -rf /var/lib/testidmapped /var/lib/private/testidmapped
cat >/run/systemd/system/testservice-34-check-idmapped.service <<\EOF
[Unit]
Description=Check id-mapped directories when DynamicUser=no with StateDirectory
[Service]
# Relevant only for sanitizer runs
EnvironmentFile=-/usr/lib/systemd/systemd-asan-env
Type=oneshot
MountAPIVFS=yes
User=root
DynamicUser=no
PrivateUsers=no
TemporaryFileSystem=/run /var/opt /var/lib /vol
UMask=0000
StateDirectory=testidmapped:sampleservice
ExecStart=/bin/bash -c ' \
set -eux; \
set -o pipefail; \
touch /var/lib/sampleservice/testfile; \
[[ 0 == $(stat -c "%%u" /var/lib/testidmapped/testfile) ]]; \
'
EOF
systemctl daemon-reload
systemctl start testservice-34-check-idmapped.service
[[ $(stat -c "%u" /var/lib/testidmapped/testfile) == 0 ]]
}
test_directory "StateDirectory" "/var/lib"
test_directory "RuntimeDirectory" "/run"
test_directory "CacheDirectory" "/var/cache"
@ -169,6 +234,11 @@ test_directory "LogsDirectory" "/var/log"
test_check_writable
if systemd-analyze compare-versions "$(uname -r)" ge 5.12; then
test_check_idmapped_mounts
test_check_idmapped_mounts_root
fi
systemd-analyze log-level info
touch /testok