From eae5127246b380bac9fedffeca8966d18d2b3344 Mon Sep 17 00:00:00 2001 From: Andres Beltran Date: Mon, 5 Aug 2024 23:43:29 +0000 Subject: [PATCH] core: add id-mapped mount support for Exec directories --- man/systemd.exec.xml | 7 +++ src/core/exec-invoke.c | 65 +++++++++++++++++++--- src/core/execute.h | 1 + src/core/namespace.c | 45 +++++++++++++++ src/core/namespace.h | 3 + src/shared/dissect-image.c | 2 +- src/shared/mount-util.c | 7 ++- src/shared/mount-util.h | 2 +- test/units/TEST-34-DYNAMICUSERMIGRATE.sh | 70 ++++++++++++++++++++++++ 9 files changed, 189 insertions(+), 13 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index d5f85ed85c7..30a926c9a08 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1476,6 +1476,13 @@ CapabilityBoundingSet=~CAP_B CAP_C below the locations defined in the following table. Also, the corresponding environment variable will be defined with the full paths of the directories. If multiple directories are set, then in the environment variable the paths are concatenated with colon (:). + + If DynamicUser= is used, and if the kernel version supports + id-mapped mounts, the specified directories will + be owned by "nobody" in the host namespace and will be mapped to (and will be owned by) the service's + UID/GID in its own namespace. For backward compatibility, existing directories created without id-mapped + mounts will be kept untouched. + Automatic directory creation and environment variables diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index 968e9bd7c4d..4e70c843746 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -2554,7 +2554,8 @@ static int setup_exec_directory( /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the * current UID/GID ownership.) */ - r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID); + const char *target_dir = pp ?: p; + r = chmod_and_chown(target_dir, context->directories[type].mode, UID_INVALID, GID_INVALID); if (r < 0) goto fail; @@ -2563,12 +2564,51 @@ static int setup_exec_directory( if (params->runtime_scope != RUNTIME_SCOPE_SYSTEM) continue; - /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we + int idmapping_supported = is_idmapping_supported(target_dir); + if (idmapping_supported < 0) { + r = log_debug_errno(idmapping_supported, "Unable to determine if ID mapping is supported on mount '%s': %m", target_dir); + goto fail; + } + + log_debug("ID-mapping is%ssupported for exec directory %s", idmapping_supported ? " " : " not ", target_dir); + + /* Change the ownership of the whole tree, if necessary. When dynamic users are used we * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID * assignments to exist. */ - r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777, AT_SYMLINK_FOLLOW); - if (r < 0) - goto fail; + uid_t chown_uid = uid; + gid_t chown_gid = gid; + bool do_chown = false; + + if (uid == 0 || gid == 0 || !idmapping_supported) { + do_chown = true; + i->idmapped = false; + } else { + /* Use 'nobody' uid/gid for exec directories if ID-mapping is supported. For backward compatibility, + * continue doing chmod/chown if the directory was chmod/chowned before (if uid/gid is not 'nobody') */ + struct stat st; + r = RET_NERRNO(stat(target_dir, &st)); + if (r < 0) + goto fail; + + if (st.st_uid == UID_NOBODY && st.st_gid == GID_NOBODY) { + do_chown = false; + i->idmapped = true; + } else if (exec_directory_is_private(context, type) && st.st_uid == 0 && st.st_gid == 0) { + chown_uid = UID_NOBODY; + chown_gid = GID_NOBODY; + do_chown = true; + i->idmapped = true; + } else { + do_chown = true; + i->idmapped = false; + } + } + + if (do_chown) { + r = path_chown_recursive(target_dir, chown_uid, chown_gid, context->dynamic_user ? 01777 : 07777, AT_SYMLINK_FOLLOW); + if (r < 0) + goto fail; + } } /* If we are not going to run in a namespace, set up the symlinks - otherwise @@ -2620,6 +2660,8 @@ static int setup_smack( static int compile_bind_mounts( const ExecContext *context, const ExecParameters *params, + uid_t exec_directory_uid, /* only used for id-mapped mounts Exec directories */ + gid_t exec_directory_gid, /* only used for id-mapped mounts Exec directories */ BindMount **ret_bind_mounts, size_t *ret_n_bind_mounts, char ***ret_empty_directories) { @@ -2721,6 +2763,9 @@ static int compile_bind_mounts( .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */ .recursive = true, .read_only = FLAGS_SET(i->flags, EXEC_DIRECTORY_READ_ONLY), + .idmapped = i->idmapped, + .uid = exec_directory_uid, + .gid = exec_directory_gid, }; } } @@ -3057,7 +3102,9 @@ static int apply_mount_namespace( ExecRuntime *runtime, const char *memory_pressure_path, bool needs_sandboxing, - char **reterr_path) { + char **reterr_path, + uid_t exec_directory_uid, + gid_t exec_directory_gid) { _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT; _cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL, @@ -3095,7 +3142,7 @@ static int apply_mount_namespace( return r; } - r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories); + r = compile_bind_mounts(context, params, exec_directory_uid, exec_directory_gid, &bind_mounts, &n_bind_mounts, &empty_directories); if (r < 0) return r; @@ -4886,7 +4933,9 @@ int exec_invoke( runtime, memory_pressure_path, needs_sandboxing, - &error_path); + &error_path, + uid, + gid); if (r < 0) { *exit_status = EXIT_NAMESPACE; return log_exec_error_errno(context, params, r, "Failed to set up mount namespacing%s%s: %m", diff --git a/src/core/execute.h b/src/core/execute.h index 2f5abc9785b..7274c68d3d8 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -163,6 +163,7 @@ typedef struct ExecDirectoryItem { char *path; char **symlinks; ExecDirectoryFlags flags; + bool idmapped; } ExecDirectoryItem; typedef struct ExecDirectory { diff --git a/src/core/namespace.c b/src/core/namespace.c index a0d3dc0cbb9..91c905f2fec 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -113,6 +113,9 @@ typedef struct MountEntry { LIST_HEAD(MountOptions, image_options_const); char **overlay_layers; VeritySettings verity; + bool idmapped; + uid_t idmap_uid; + gid_t idmap_gid; } MountEntry; typedef struct MountList { @@ -467,6 +470,9 @@ static int append_bind_mounts(MountList *ml, const BindMount *binds, size_t n) { .flags = b->nodev ? MS_NODEV : 0, .source_const = b->source, .ignore = b->ignore_enoent, + .idmapped = b->idmapped, + .idmap_uid = b->uid, + .idmap_gid = b->gid, }; } @@ -1892,6 +1898,45 @@ static int apply_one_mount( } log_debug("Successfully mounted %s to %s", what, mount_entry_path(m)); + + /* Take care of id-mapped mounts */ + if (m->idmapped && uid_is_valid(m->idmap_uid) && gid_is_valid(m->idmap_gid)) { + _cleanup_close_ int userns_fd = -EBADF; + _cleanup_free_ char *uid_map = NULL, *gid_map = NULL; + + log_debug("Setting an id-mapped mount on %s", mount_entry_path(m)); + + /* Do mapping from nobody (in setup_exec_directory()) -> this uid */ + if (strextendf(&uid_map, UID_FMT " " UID_FMT " " UID_FMT "\n", UID_NOBODY, (uid_t)m->idmap_uid, (uid_t)1u) < 0) + return log_oom(); + + /* Consider StateDirectory=xxx aaa xxx:aaa/222 + * To allow for later symlink creation (by root) in create_symlinks_from_tuples(), map root as well. */ + if (m->idmap_uid != (uid_t)0) { + if (strextendf(&uid_map, UID_FMT " " UID_FMT " " UID_FMT "\n", (uid_t)0, (uid_t)0, (uid_t)1u) < 0) + return log_oom(); + } + + if (strextendf(&gid_map, GID_FMT " " GID_FMT " " GID_FMT "\n", GID_NOBODY, (gid_t)m->idmap_gid, (gid_t)1u) < 0) + return log_oom(); + + if (m->idmap_gid != (gid_t)0) { + if (strextendf(&gid_map, GID_FMT " " GID_FMT " " GID_FMT "\n", (gid_t)0, (gid_t)0, (gid_t)1u) < 0) + return log_oom(); + } + + userns_fd = userns_acquire(uid_map, gid_map); + if (userns_fd < 0) + return log_error_errno(userns_fd, "Failed to allocate user namespace: %m"); + + /* Drop SUID, add NOEXEC for the mount to avoid root exploits */ + r = remount_idmap_fd(STRV_MAKE(mount_entry_path(m)), userns_fd, MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV); + if (r < 0) + return log_error_errno(r, "Failed to create an id-mapped mount: %m"); + + log_debug("ID-mapped mount created successfully for %s from " UID_FMT " to " UID_FMT "", mount_entry_path(m), UID_NOBODY, m->idmap_uid); + } + return 1; } diff --git a/src/core/namespace.h b/src/core/namespace.h index fa4f7c7cf67..7b6e892cc29 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -87,6 +87,9 @@ struct BindMount { bool noexec; bool recursive; bool ignore_enoent; + bool idmapped; + uid_t uid; + gid_t gid; }; struct TemporaryFileSystem { diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index 86fd1ef865d..f12a93d0dcf 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -2089,7 +2089,7 @@ static int mount_partition( (void) fs_grow(node, -EBADF, p); if (userns_fd >= 0) { - r = remount_idmap_fd(STRV_MAKE(p), userns_fd); + r = remount_idmap_fd(STRV_MAKE(p), userns_fd, /* extra_mount_attr_set= */ 0); if (r < 0) return r; } diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index 20b402e3d33..3c89a18790c 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -1386,7 +1386,8 @@ int make_userns(uid_t uid_shift, uid_t uid_range, uid_t source_owner, uid_t dest int remount_idmap_fd( char **paths, - int userns_fd) { + int userns_fd, + uint64_t extra_mount_attr_set) { int r; @@ -1423,7 +1424,7 @@ int remount_idmap_fd( /* Set the user namespace mapping attribute on the cloned mount point */ if (mount_setattr(mntfd, "", AT_EMPTY_PATH, &(struct mount_attr) { - .attr_set = MOUNT_ATTR_IDMAP, + .attr_set = MOUNT_ATTR_IDMAP | extra_mount_attr_set, .userns_fd = userns_fd, }, sizeof(struct mount_attr)) < 0) return log_debug_errno(errno, "Failed to change bind mount attributes for clone of '%s': %m", paths[i]); @@ -1460,7 +1461,7 @@ int remount_idmap( if (userns_fd < 0) return userns_fd; - return remount_idmap_fd(p, userns_fd); + return remount_idmap_fd(p, userns_fd, /* extra_mount_attr_set= */ 0); } static void sub_mount_clear(SubMount *s) { diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 765dbf72f88..eb068d5b44f 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -163,7 +163,7 @@ typedef enum RemountIdmapping { } RemountIdmapping; int make_userns(uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping); -int remount_idmap_fd(char **p, int userns_fd); +int remount_idmap_fd(char **p, int userns_fd, uint64_t extra_mount_attr_set); int remount_idmap(char **p, uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping); int bind_mount_submounts( diff --git a/test/units/TEST-34-DYNAMICUSERMIGRATE.sh b/test/units/TEST-34-DYNAMICUSERMIGRATE.sh index d6bdfb07416..e5aee475af7 100755 --- a/test/units/TEST-34-DYNAMICUSERMIGRATE.sh +++ b/test/units/TEST-34-DYNAMICUSERMIGRATE.sh @@ -162,6 +162,71 @@ EOF systemctl start testservice-34-check-writable.service } +test_check_idmapped_mounts() { + rm -rf /var/lib/testidmapped /var/lib/private/testidmapped + + cat >/run/systemd/system/testservice-34-check-idmapped.service <<\EOF +[Unit] +Description=Check id-mapped directories when DynamicUser=yes with StateDirectory + +[Service] +# Relevant only for sanitizer runs +EnvironmentFile=-/usr/lib/systemd/systemd-asan-env +Type=oneshot + +MountAPIVFS=yes +DynamicUser=yes +PrivateUsers=yes +TemporaryFileSystem=/run /var/opt /var/lib /vol +UMask=0000 +StateDirectory=testidmapped:sampleservice +ExecStart=/bin/bash -c ' \ + set -eux; \ + set -o pipefail; \ + touch /var/lib/sampleservice/testfile; \ + [[ $(awk "NR==2 {print \$1}" /proc/self/uid_map) == $(stat -c "%%u" /var/lib/private/testidmapped/testfile) ]]; \ +' +EOF + + systemctl daemon-reload + systemctl start testservice-34-check-idmapped.service + + [[ $(stat -c "%u" /var/lib/private/testidmapped/testfile) == 65534 ]] +} + +test_check_idmapped_mounts_root() { + rm -rf /var/lib/testidmapped /var/lib/private/testidmapped + + cat >/run/systemd/system/testservice-34-check-idmapped.service <<\EOF +[Unit] +Description=Check id-mapped directories when DynamicUser=no with StateDirectory + +[Service] +# Relevant only for sanitizer runs +EnvironmentFile=-/usr/lib/systemd/systemd-asan-env +Type=oneshot + +MountAPIVFS=yes +User=root +DynamicUser=no +PrivateUsers=no +TemporaryFileSystem=/run /var/opt /var/lib /vol +UMask=0000 +StateDirectory=testidmapped:sampleservice +ExecStart=/bin/bash -c ' \ + set -eux; \ + set -o pipefail; \ + touch /var/lib/sampleservice/testfile; \ + [[ 0 == $(stat -c "%%u" /var/lib/testidmapped/testfile) ]]; \ +' +EOF + + systemctl daemon-reload + systemctl start testservice-34-check-idmapped.service + + [[ $(stat -c "%u" /var/lib/testidmapped/testfile) == 0 ]] +} + test_directory "StateDirectory" "/var/lib" test_directory "RuntimeDirectory" "/run" test_directory "CacheDirectory" "/var/cache" @@ -169,6 +234,11 @@ test_directory "LogsDirectory" "/var/log" test_check_writable +if systemd-analyze compare-versions "$(uname -r)" ge 5.12; then + test_check_idmapped_mounts + test_check_idmapped_mounts_root +fi + systemd-analyze log-level info touch /testok