mountfsd: add new systemd-mountfsd component

This commit is contained in:
Lennart Poettering 2023-03-09 12:27:29 +01:00
parent 54452c7b2a
commit 702a52f4b5
19 changed files with 1407 additions and 1 deletions

View File

@ -676,3 +676,21 @@ Tools using the Varlink protocol (such as `varlinkctl`) or sd-bus (such as
service. Takes a file system path: if specified the tool will listen on an
`AF_UNIX` stream socket on the specified path in addition to whatever else it
would listen on.
`systemd-mountfsd`:
* `$SYSTEMD_MOUNTFSD_TRUSTED_DIRECTORIES` takes a boolean argument. If true
disk images from the usual disk image directories (`/var/lib/machines/`,
`/var/lib/confexts/`, …) will be considered "trusted", i.e. are validated
with a more relaxed image policy (typically not requiring Verity signature
checking) than those from other directories (where Verity signature checks
are mandatory). If false all images are treated the same, regardless if
placed in the usual disk image directories or elsewhere. If not set defaults
to a compile time setting.
* `$SYSTEMD_MOUNTFSD_IMAGE_POLICY_TRUSTED`,
`$SYSTEMD_MOUNTFSD_IMAGE_POLICY_UNTRUSTED` the default image policy to
apply to trusted and untrusted disk images. An image is considered trusted if
placed in a trusted disk image directory (see above), or if suitable polkit
authentication was acquired. See `systemd.image-policy(7)` for the valid
syntax for image policy strings.

View File

@ -999,6 +999,7 @@ manpages = [
['systemd-measure', '1', [], 'HAVE_TPM2 HAVE_BLKID HAVE_OPENSSL'],
['systemd-modules-load.service', '8', ['systemd-modules-load'], 'HAVE_KMOD'],
['systemd-mount', '1', ['systemd-umount'], ''],
['systemd-mountfsd.service', '8', ['systemd-mountfsd'], 'ENABLE_MOUNTFSD'],
['systemd-network-generator.service', '8', ['systemd-network-generator'], ''],
['systemd-networkd-wait-online.service',
'8',

View File

@ -0,0 +1,70 @@
<?xml version='1.0'?> <!--*-nxml-*-->
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
<!-- SPDX-License-Identifier: LGPL-2.1-or-later -->
<refentry id="systemd-mountfsd.service" conditional='ENABLE_MOUNTFSD'>
<refentryinfo>
<title>systemd-mountfsd.service</title>
<productname>systemd</productname>
</refentryinfo>
<refmeta>
<refentrytitle>systemd-mountfsd.service</refentrytitle>
<manvolnum>8</manvolnum>
</refmeta>
<refnamediv>
<refname>systemd-mountfsd.service</refname>
<refname>systemd-mountfsd</refname>
<refpurpose>Disk Image File System Mount Service</refpurpose>
</refnamediv>
<refsynopsisdiv>
<para><filename>systemd-mountfsd.service</filename></para>
<para><filename>/usr/lib/systemd/systemd-mountfsd</filename></para>
</refsynopsisdiv>
<refsect1>
<title>Description</title>
<para><command>systemd-mountfsd</command> is a system service that dissects disk images, and returns mount
file descriptors for the file systems contained therein to clients, via a Varlink IPC API.</para>
<para>The disk images provided must contain a raw file system image or must follow the <ulink
url="https://uapi-group.org/specifications/specs/discoverable_partitions_specification/">Discoverable
Partitions Specification</ulink>. Before mounting any file systems authenticity of the disk image is
established in one or a combination of the following ways:</para>
<orderedlist>
<listitem><para>If the disk image is located in a regular file in one of the directories
<filename>/var/lib/machines/</filename>, <filename>/var/lib/portables/</filename>,
<filename>/var/lib/extensions/</filename>, <filename>/var/lib/confexts/</filename> or their
counterparts in the <filename>/etc/</filename>, <filename>/run/</filename>,
<filename>/usr/lib/</filename> it is assumed to be trusted.</para></listitem>
<listitem><para>If the disk image contains a Verity enabled disk image, along with a signature
partition with a key in the kernel keyring or in <filename>/etc/verity.d/</filename> (and related
directories) the disk image is considered trusted.</para></listitem>
</orderedlist>
<para>This service provides one <ulink url="https://varlink.org/">Varlink</ulink> service:
<constant>io.systemd.MountFileSystem</constant> which accepts a file descriptor to a regular file or
block device, and returns a number of file descriptors referring to an <function>fsmount()</function>
file descriptor the client may then attach to a path of their choice.</para>
<para>The returned mounts are automatically allowlisted in the per-user-namespace allowlist maintained by
<citerefentry><refentrytitle>systemd-nsresourced.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>.</para>
<para>The file systems are automatically fsck'ed before mounting.</para>
</refsect1>
<refsect1>
<title>See Also</title>
<para>
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry><refentrytitle>systemd-nsresourced.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>
</para>
</refsect1>
</refentry>

View File

@ -272,6 +272,7 @@ conf.set_quoted('SYSTEMD_TEST_DATA', testdata_dir)
conf.set_quoted('SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH', bindir / 'systemd-tty-ask-password-agent')
conf.set_quoted('SYSTEMD_UPDATE_HELPER_PATH', libexecdir / 'systemd-update-helper')
conf.set_quoted('SYSTEMD_USERWORK_PATH', libexecdir / 'systemd-userwork')
conf.set_quoted('SYSTEMD_MOUNTWORK_PATH', libexecdir / 'systemd-mountwork')
conf.set_quoted('SYSTEMD_NSRESOURCEWORK_PATH', libexecdir / 'systemd-nsresourcework')
conf.set_quoted('SYSTEMD_VERITYSETUP_PATH', libexecdir / 'systemd-veritysetup')
conf.set_quoted('SYSTEM_CONFIG_UNIT_DIR', pkgsysconfdir / 'system')
@ -1598,6 +1599,8 @@ conf.set10('ENABLE_REMOTE', have)
feature = get_option('vmspawn').disable_auto_if(conf.get('BUILD_MODE_DEVELOPER') == 0)
conf.set10('ENABLE_VMSPAWN', feature.allowed())
conf.set10('DEFAULT_MOUNTFSD_TRUSTED_DIRECTORIES', get_option('default-mountfsd-trusted-directories'))
foreach term : ['analyze',
'backlight',
'binfmt',
@ -1618,6 +1621,7 @@ foreach term : ['analyze',
'localed',
'logind',
'machined',
'mountfsd',
'networkd',
'nscd',
'nsresourced',
@ -2276,6 +2280,7 @@ subdir('src/locale')
subdir('src/login')
subdir('src/machine')
subdir('src/machine-id-setup')
subdir('src/mountfsd')
subdir('src/modules-load')
subdir('src/mount')
subdir('src/network')

View File

@ -124,6 +124,8 @@ option('portabled', type : 'boolean',
description : 'install the systemd-portabled stack')
option('sysext', type : 'boolean',
description : 'install the systemd-sysext stack')
option('mountfsd', type : 'boolean',
description : 'install the systemd-mountfsd stack')
option('userdb', type : 'boolean',
description : 'install the systemd-userdbd stack')
option('homed', type : 'feature', deprecated : { 'true' : 'enabled', 'false' : 'disabled' },
@ -528,3 +530,6 @@ option('vmlinux-h', type : 'combo', choices : ['auto', 'provided', 'generated',
description : 'which vmlinux.h to use')
option('vmlinux-h-path', type : 'string', value : '',
description : 'path to vmlinux.h to use')
option('default-mountfsd-trusted-directories', type : 'boolean', value: false,
description : 'controls whether mountfsd should apply a relaxed policy on DDIs in system DDI directories')

View File

@ -23,9 +23,10 @@ enable systemd-homed.service
enable systemd-homed-activate.service
enable systemd-homed-firstboot.service
enable systemd-journald-audit.socket
enable systemd-mountfsd.socket
enable systemd-network-generator.service
enable systemd-networkd-wait-online.service
enable systemd-networkd.service
enable systemd-networkd-wait-online.service
enable systemd-nsresourced.socket
enable systemd-pstore.service
enable systemd-resolved.service

View File

@ -0,0 +1,70 @@
<?xml version="1.0" encoding="UTF-8"?> <!--*-nxml-*-->
<!DOCTYPE policyconfig PUBLIC "-//freedesktop//DTD PolicyKit Policy Configuration 1.0//EN"
"https://www.freedesktop.org/standards/PolicyKit/1/policyconfig.dtd">
<!--
SPDX-License-Identifier: LGPL-2.1-or-later
This file is part of systemd.
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
-->
<policyconfig>
<vendor>The systemd Project</vendor>
<vendor_url>https://systemd.io</vendor_url>
<!-- Allow mounting DDIs into the host user namespace -->
<action id="io.systemd.mount-file-system.mount-image">
<!-- This action is generally checked first: we'll first try to mount the image with
signature checks on. If that fails, we'll retry with the untrusted action below. -->
<description gettext-domain="systemd">Allow mounting of file system image</description>
<message gettext-domain="systemd">Authentication is required for an application to mount a file system image.</message>
<defaults>
<allow_any>auth_admin_keep</allow_any>
<allow_inactive>auth_admin_keep</allow_inactive>
<allow_active>yes</allow_active>
</defaults>
</action>
<action id="io.systemd.mount-file-system.mount-untrusted-image">
<!-- If the image cannot be mounted via the regular action because it is not signed by a
recognized key, we'll try this action. -->
<description gettext-domain="systemd">Allow mounting of untrusted file system image</description>
<message gettext-domain="systemd">Authentication is required for an application to mount a cryptographically unsigned file system image or an image whose cryptographic signature is not recognized.</message>
<defaults>
<allow_any>auth_admin</allow_any>
<allow_inactive>auth_admin</allow_inactive>
<allow_active>auth_admin</allow_active>
</defaults>
<annotate key="org.freedesktop.policykit.imply">io.systemd.mount-file-system.mount-image</annotate>
</action>
<!-- Allow mounting DDIs into a private user namespace -->
<action id="io.systemd.mount-file-system.mount-image-privately">
<description gettext-domain="systemd">Allow private mounting of trusted file system image</description>
<message gettext-domain="systemd">Authentication is required for an application to privately mount a file system image or an image whose cryptographic signature is recognized.</message>
<defaults>
<allow_any>yes</allow_any>
<allow_inactive>yes</allow_inactive>
<allow_active>yes</allow_active>
</defaults>
</action>
<action id="io.systemd.mount-file-system.mount-untrusted-image-privately">
<description gettext-domain="systemd">Allow private mounting of untrusted file system image</description>
<message gettext-domain="systemd">Authentication is required for an application to privately mount a cryptographically unsigned file system image or an image whose cryptographic signature is not recognized.</message>
<defaults>
<allow_any>auth_admin</allow_any>
<allow_inactive>auth_admin</allow_inactive>
<allow_active>auth_admin</allow_active>
</defaults>
<annotate key="org.freedesktop.policykit.imply">io.systemd.mount-file-system.mount-image-privately</annotate>
</action>
</policyconfig>

28
src/mountfsd/meson.build Normal file
View File

@ -0,0 +1,28 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
systemd_mountwork_sources = files(
'mountwork.c',
)
systemd_mountfsd_sources = files(
'mountfsd.c',
'mountfsd-manager.c',
)
executables += [
libexec_template + {
'name' : 'systemd-mountfsd',
'conditions' : ['ENABLE_MOUNTFSD'],
'sources' : systemd_mountfsd_sources,
},
libexec_template + {
'name' : 'systemd-mountwork',
'conditions' : ['ENABLE_MOUNTFSD'],
'sources' : systemd_mountwork_sources,
'link_with' : common_libs,
'dependencies' : common_deps,
},
]
install_data('io.systemd.mount-file-system.policy',
install_dir : polkitpolicydir)

View File

@ -0,0 +1,277 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <sys/wait.h>
#include "sd-daemon.h"
#include "build-path.h"
#include "common-signal.h"
#include "env-util.h"
#include "fd-util.h"
#include "fs-util.h"
#include "mkdir.h"
#include "mountfsd-manager.h"
#include "process-util.h"
#include "set.h"
#include "signal-util.h"
#include "socket-util.h"
#include "stdio-util.h"
#include "umask-util.h"
#define LISTEN_TIMEOUT_USEC (25 * USEC_PER_SEC)
static int start_workers(Manager *m, bool explicit_request);
static size_t manager_current_workers(Manager *m) {
assert(m);
return set_size(m->workers_fixed) + set_size(m->workers_dynamic);
}
static int on_worker_exit(sd_event_source *s, const siginfo_t *si, void *userdata) {
Manager *m = ASSERT_PTR(userdata);
assert(s);
assert_se(!set_remove(m->workers_dynamic, s) != !set_remove(m->workers_fixed, s));
sd_event_source_disable_unref(s);
if (si->si_code == CLD_EXITED) {
if (si->si_status == EXIT_SUCCESS)
log_debug("Worker " PID_FMT " exited successfully.", si->si_pid);
else
log_warning("Worker " PID_FMT " died with a failure exit status %i, ignoring.", si->si_pid, si->si_status);
} else if (si->si_code == CLD_KILLED)
log_warning("Worker " PID_FMT " was killed by signal %s, ignoring.", si->si_pid, signal_to_string(si->si_status));
else if (si->si_code == CLD_DUMPED)
log_warning("Worker " PID_FMT " dumped core by signal %s, ignoring.", si->si_pid, signal_to_string(si->si_status));
else
log_warning("Got unexpected exit code via SIGCHLD, ignoring.");
(void) start_workers(m, /* explicit_request= */ false); /* Fill up workers again if we fell below the low watermark */
return 0;
}
static int on_sigusr2(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
Manager *m = ASSERT_PTR(userdata);
(void) start_workers(m, /* explicit_request= */ true); /* Workers told us there's more work, let's add one more worker as long as we are below the high watermark */
return 0;
}
DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(
event_source_hash_ops,
sd_event_source,
(void (*)(const sd_event_source*, struct siphash*)) trivial_hash_func,
(int (*)(const sd_event_source*, const sd_event_source*)) trivial_compare_func,
sd_event_source_disable_unref);
int manager_new(Manager **ret) {
_cleanup_(manager_freep) Manager *m = NULL;
int r;
m = new(Manager, 1);
if (!m)
return -ENOMEM;
*m = (Manager) {
.listen_fd = -EBADF,
.worker_ratelimit = {
.interval = 5 * USEC_PER_SEC,
.burst = 50,
},
};
r = sd_event_new(&m->event);
if (r < 0)
return r;
r = sd_event_set_signal_exit(m->event, true);
if (r < 0)
return r;
r = sd_event_add_signal(m->event, NULL, (SIGRTMIN+18)|SD_EVENT_SIGNAL_PROCMASK, sigrtmin18_handler, NULL);
if (r < 0)
return r;
r = sd_event_add_memory_pressure(m->event, NULL, NULL, NULL);
if (r < 0)
log_debug_errno(r, "Failed allocate memory pressure event source, ignoring: %m");
r = sd_event_set_watchdog(m->event, true);
if (r < 0)
log_debug_errno(r, "Failed to enable watchdog handling, ignoring: %m");
r = sd_event_add_signal(m->event, NULL, SIGUSR2|SD_EVENT_SIGNAL_PROCMASK, on_sigusr2, m);
if (r < 0)
return r;
*ret = TAKE_PTR(m);
return 0;
}
Manager* manager_free(Manager *m) {
if (!m)
return NULL;
set_free(m->workers_fixed);
set_free(m->workers_dynamic);
/* Note: we rely on PR_DEATHSIG to kill the workers for us */
sd_event_unref(m->event);
return mfree(m);
}
static int start_one_worker(Manager *m) {
_cleanup_(sd_event_source_disable_unrefp) sd_event_source *source = NULL;
bool fixed;
pid_t pid;
int r;
assert(m);
fixed = set_size(m->workers_fixed) < MOUNTFS_WORKERS_MIN;
r = safe_fork_full(
"(sd-worker)",
/* stdio_fds= */ NULL,
&m->listen_fd, 1,
FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM|FORK_REOPEN_LOG|FORK_LOG|FORK_CLOSE_ALL_FDS,
&pid);
if (r < 0)
return log_error_errno(r, "Failed to fork new worker child: %m");
if (r == 0) {
char pids[DECIMAL_STR_MAX(pid_t)];
/* Child */
if (m->listen_fd == 3) {
r = fd_cloexec(3, false);
if (r < 0) {
log_error_errno(r, "Failed to turn off O_CLOEXEC for fd 3: %m");
_exit(EXIT_FAILURE);
}
} else {
if (dup2(m->listen_fd, 3) < 0) { /* dup2() creates with O_CLOEXEC off */
log_error_errno(errno, "Failed to move listen fd to 3: %m");
_exit(EXIT_FAILURE);
}
safe_close(m->listen_fd);
}
xsprintf(pids, PID_FMT, pid);
if (setenv("LISTEN_PID", pids, 1) < 0) {
log_error_errno(errno, "Failed to set $LISTEN_PID: %m");
_exit(EXIT_FAILURE);
}
if (setenv("LISTEN_FDS", "1", 1) < 0) {
log_error_errno(errno, "Failed to set $LISTEN_FDS: %m");
_exit(EXIT_FAILURE);
}
if (setenv("MOUNTFS_FIXED_WORKER", one_zero(fixed), 1) < 0) {
log_error_errno(errno, "Failed to set $MOUNTFS_FIXED_WORKER: %m");
_exit(EXIT_FAILURE);
}
r = setenv_systemd_log_level();
if (r < 0) {
log_error_errno(r, "Failed to set $SYSTEMD_LOG_LEVEL: %m");
_exit(EXIT_FAILURE);
}
r = invoke_callout_binary(SYSTEMD_MOUNTWORK_PATH, STRV_MAKE("systemd-mountwork", "xxxxxxxxxxxxxxxx")); /* With some extra space rename_process() can make use of */
log_error_errno(r, "Failed start worker process: %m");
_exit(EXIT_FAILURE);
}
r = sd_event_add_child(m->event, &source, pid, WEXITED, on_worker_exit, m);
if (r < 0)
return log_error_errno(r, "Failed to watch child " PID_FMT ": %m", pid);
r = set_ensure_put(
fixed ? &m->workers_fixed : &m->workers_dynamic,
&event_source_hash_ops,
source);
if (r < 0)
return log_error_errno(r, "Failed to add child process to set: %m");
TAKE_PTR(source);
return 0;
}
static int start_workers(Manager *m, bool explicit_request) {
int r;
assert(m);
for (;;) {
size_t n;
n = manager_current_workers(m);
log_debug("%zu workers running.", n);
if (n >= MOUNTFS_WORKERS_MIN && (!explicit_request || n >= MOUNTFS_WORKERS_MAX))
break;
if (!ratelimit_below(&m->worker_ratelimit)) {
/* If we keep starting workers too often, let's fail the whole daemon, something is wrong */
sd_event_exit(m->event, EXIT_FAILURE);
return log_error_errno(SYNTHETIC_ERRNO(EUCLEAN), "Worker threads requested too frequently, something is wrong.");
}
r = start_one_worker(m);
if (r < 0)
return r;
explicit_request = false;
}
return 0;
}
int manager_startup(Manager *m) {
int n;
assert(m);
assert(m->listen_fd < 0);
n = sd_listen_fds(false);
if (n < 0)
return log_error_errno(n, "Failed to determine number of passed file descriptors: %m");
if (n > 1)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected one listening fd, got %i.", n);
if (n == 1)
m->listen_fd = SD_LISTEN_FDS_START;
else {
static const union sockaddr_union sockaddr = {
.un.sun_family = AF_UNIX,
.un.sun_path = "/run/systemd/io.systemd.MountFileSystem",
};
m->listen_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
if (m->listen_fd < 0)
return log_error_errno(errno, "Failed to bind on socket: %m");
(void) sockaddr_un_unlink(&sockaddr.un);
WITH_UMASK(0000)
if (bind(m->listen_fd, &sockaddr.sa, SOCKADDR_UN_LEN(sockaddr.un)) < 0)
return log_error_errno(errno, "Failed to bind socket: %m");
if (listen(m->listen_fd, SOMAXCONN) < 0)
return log_error_errno(errno, "Failed to listen on socket: %m");
}
/* Let's make sure every accept() call on this socket times out after 25s. This allows workers to be
* GC'ed on idle */
if (setsockopt(m->listen_fd, SOL_SOCKET, SO_RCVTIMEO, TIMEVAL_STORE(LISTEN_TIMEOUT_USEC), sizeof(struct timeval)) < 0)
return log_error_errno(errno, "Failed to se SO_RCVTIMEO: %m");
return start_workers(m, /* explicit_request= */ false);
}

View File

@ -0,0 +1,30 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#include "sd-bus.h"
#include "sd-event.h"
typedef struct Manager Manager;
#include "hashmap.h"
#include "ratelimit.h"
#define MOUNTFS_WORKERS_MIN 3
#define MOUNTFS_WORKERS_MAX 4096
struct Manager {
sd_event *event;
Set *workers_fixed; /* Workers 0…MOUNTFS_WORKERS_MIN */
Set *workers_dynamic; /* Workers MOUNTFS_WORKERS_MIN+1…MOUNTFS_WORKERS_MAX */
int listen_fd;
RateLimit worker_ratelimit;
};
int manager_new(Manager **ret);
Manager* manager_free(Manager *m);
DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
int manager_startup(Manager *m);

43
src/mountfsd/mountfsd.c Normal file
View File

@ -0,0 +1,43 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <sys/stat.h>
#include <sys/types.h>
#include "daemon-util.h"
#include "log.h"
#include "main-func.h"
#include "mountfsd-manager.h"
#include "signal-util.h"
static int run(int argc, char *argv[]) {
_unused_ _cleanup_(notify_on_cleanup) const char *notify_stop = NULL;
_cleanup_(manager_freep) Manager *m = NULL;
int r;
log_setup();
umask(0022);
if (argc != 1)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "This program takes no arguments.");
assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD) >= 0);
r = manager_new(&m);
if (r < 0)
return log_error_errno(r, "Could not create manager: %m");
r = manager_startup(m);
if (r < 0)
return log_error_errno(r, "Failed to start up daemon: %m");
notify_stop = notify_start(NOTIFY_READY, NOTIFY_STOPPING);
r = sd_event_loop(m->event);
if (r < 0)
return log_error_errno(r, "Event loop failed: %m");
return 0;
}
DEFINE_MAIN_FUNCTION(run);

703
src/mountfsd/mountwork.c Normal file
View File

@ -0,0 +1,703 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include "sd-daemon.h"
#include "argv-util.h"
#include "bus-polkit.h"
#include "chase.h"
#include "discover-image.h"
#include "dissect-image.h"
#include "env-util.h"
#include "errno-util.h"
#include "fd-util.h"
#include "io-util.h"
#include "main-func.h"
#include "missing_loop.h"
#include "namespace-util.h"
#include "nsresource.h"
#include "nulstr-util.h"
#include "os-util.h"
#include "process-util.h"
#include "stat-util.h"
#include "user-util.h"
#include "varlink.h"
#include "varlink-io.systemd.MountFileSystem.h"
#define ITERATIONS_MAX 64U
#define RUNTIME_MAX_USEC (5 * USEC_PER_MINUTE)
#define PRESSURE_SLEEP_TIME_USEC (50 * USEC_PER_MSEC)
#define LISTEN_IDLE_USEC (90 * USEC_PER_SEC)
static const ImagePolicy image_policy_untrusted = {
.n_policies = 2,
.policies = {
{ PARTITION_ROOT, PARTITION_POLICY_SIGNED|PARTITION_POLICY_ABSENT },
{ PARTITION_USR, PARTITION_POLICY_SIGNED|PARTITION_POLICY_ABSENT },
},
.default_flags = PARTITION_POLICY_IGNORE,
};
static int json_dispatch_image_policy(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
_cleanup_(image_policy_freep) ImagePolicy *q = NULL;
ImagePolicy **p = ASSERT_PTR(userdata);
int r;
assert(p);
if (json_variant_is_null(variant)) {
*p = image_policy_free(*p);
return 0;
}
if (!json_variant_is_string(variant))
return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
r = image_policy_from_string(json_variant_string(variant), &q);
if (r < 0)
return json_log(variant, flags, r, "JSON field '%s' is not a valid image policy.", strna(name));
image_policy_free(*p);
*p = TAKE_PTR(q);
return 0;
}
typedef struct MountImageParameters {
unsigned image_fd_idx;
unsigned userns_fd_idx;
int read_only;
int growfs;
char *password;
ImagePolicy *image_policy;
} MountImageParameters;
static void mount_image_parameters_done(MountImageParameters *p) {
assert(p);
p->password = erase_and_free(p->password);
p->image_policy = image_policy_free(p->image_policy);
}
static int validate_image_fd(int fd, MountImageParameters *p) {
int r, fl;
assert(fd >= 0);
assert(p);
r = fd_verify_regular(fd);
if (r < 0)
return r;
fl = fd_verify_safe_flags(fd);
if (fl < 0)
return log_debug_errno(fl, "Image file descriptor has unsafe flags set: %m");
switch (fl & O_ACCMODE) {
case O_RDONLY:
p->read_only = true;
break;
case O_RDWR:
break;
default:
return -EBADF;
}
return 0;
}
static int verify_trusted_image_fd_by_path(int fd) {
_cleanup_free_ char *p = NULL;
struct stat sta;
int r;
assert(fd >= 0);
r = secure_getenv_bool("SYSTEMD_MOUNTFSD_TRUSTED_DIRECTORIES");
if (r == -ENXIO) {
if (!DEFAULT_MOUNTFSD_TRUSTED_DIRECTORIES) {
log_debug("Trusted directory mechanism disabled at compile time.");
return false;
}
} else if (r < 0) {
log_debug_errno(r, "Failed to parse $SYSTEMD_MOUNTFSD_TRUSTED_DIRECTORIES environment variable, not trusting any image.");
return false;
} else if (!r) {
log_debug("Trusted directory mechanism disabled via $SYSTEMD_MOUNTFSD_TRUSTED_DIRECTORIES environment variable.");
return false;
}
r = fd_get_path(fd, &p);
if (r < 0)
return log_debug_errno(r, "Failed to get path of passed image file descriptor: %m");
if (fstat(fd, &sta) < 0)
return log_debug_errno(errno, "Failed to stat() passed image file descriptor: %m");
log_debug("Checking if image '%s' is in trusted directories.", p);
for (ImageClass c = 0; c < _IMAGE_CLASS_MAX; c++)
NULSTR_FOREACH(s, image_search_path[c]) {
_cleanup_close_ int dir_fd = -EBADF, inode_fd = -EBADF;
_cleanup_free_ char *q = NULL;
struct stat stb;
const char *e;
r = chase(s, NULL, CHASE_SAFE, &q, &dir_fd);
if (r == -ENOENT)
continue;
if (r < 0) {
log_warning_errno(r, "Failed to resolve search path '%s', ignoring: %m", s);
continue;
}
/* Check that the inode refers to a file immediately inside the image directory,
* i.e. not the image directory itself, and nothing further down the tree */
e = path_startswith(p, q);
if (isempty(e))
continue;
e += strspn(e, "/");
if (!filename_is_valid(e))
continue;
r = chaseat(dir_fd, e, CHASE_SAFE, NULL, &inode_fd);
if (r < 0)
return log_error_errno(r, "Couldn't verify that specified image '%s' is in search path '%s': %m", p, s);
if (fstat(inode_fd, &stb) < 0)
return log_error_errno(errno, "Failed to stat image file '%s/%s': %m", q, e);
if (stat_inode_same(&sta, &stb)) {
log_debug("Image '%s' is *in* trusted directories.", p);
return true; /* Yay */
}
}
log_debug("Image '%s' is *not* in trusted directories.", p);
return false;
}
static int determine_image_policy(
int image_fd,
bool trusted,
ImagePolicy *client_policy,
ImagePolicy **ret) {
_cleanup_(image_policy_freep) ImagePolicy *envvar_policy = NULL;
const ImagePolicy *default_policy;
const char *envvar, *e;
int r;
assert(image_fd >= 0);
assert(ret);
if (trusted) {
envvar = "SYSTEMD_MOUNTFSD_IMAGE_POLICY_TRUSTED";
default_policy = &image_policy_allow;
} else {
envvar = "SYSTEMD_MOUNTFSD_IMAGE_POLICY_UNTRUSTED";
default_policy = &image_policy_untrusted;
}
e = secure_getenv(envvar);
if (e) {
r = image_policy_from_string(e, &envvar_policy);
if (r < 0)
return log_error_errno(r, "Failed to parse image policy supplied via $%s: %m", envvar);
default_policy = envvar_policy;
}
return image_policy_intersect(default_policy, client_policy, ret);
}
static int validate_userns(Varlink *link, int *userns_fd) {
int r;
assert(link);
assert(userns_fd);
if (*userns_fd < 0)
return 0;
r = fd_verify_safe_flags(*userns_fd);
if (r < 0)
return log_debug_errno(r, "User namespace file descriptor has unsafe flags set: %m");
r = fd_is_ns(*userns_fd, CLONE_NEWUSER);
if (r < 0)
return r;
if (r == 0)
return varlink_error_invalid_parameter_name(link, "userNamespaceFileDescriptor");
/* Our own host user namespace? Then close the fd, and handle it as if none was specified. */
r = is_our_namespace(*userns_fd, NAMESPACE_USER);
if (r < 0)
return log_debug_errno(r, "Failed to determine if user namespace provided by client is our own.");
if (r > 0) {
log_debug("User namespace provided by client is our own.");
*userns_fd = safe_close(*userns_fd);
}
return 0;
}
static int vl_method_mount_image(
Varlink *link,
JsonVariant *parameters,
VarlinkMethodFlags flags,
void *userdata) {
static const JsonDispatch dispatch_table[] = {
{ "imageFileDescriptor", JSON_VARIANT_UNSIGNED, json_dispatch_uint, offsetof(MountImageParameters, image_fd_idx), JSON_MANDATORY },
{ "userNamespaceFileDescriptor", JSON_VARIANT_UNSIGNED, json_dispatch_uint, offsetof(MountImageParameters, userns_fd_idx), 0 },
{ "readOnly", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(MountImageParameters, read_only), 0 },
{ "growFileSystems", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(MountImageParameters, growfs), 0 },
{ "password", JSON_VARIANT_STRING, json_dispatch_string, offsetof(MountImageParameters, password), 0 },
{ "imagePolicy", JSON_VARIANT_STRING, json_dispatch_image_policy, offsetof(MountImageParameters, image_policy), 0 },
VARLINK_DISPATCH_POLKIT_FIELD,
{}
};
_cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
_cleanup_(mount_image_parameters_done) MountImageParameters p = {
.image_fd_idx = UINT_MAX,
.userns_fd_idx = UINT_MAX,
.read_only = -1,
.growfs = -1,
};
_cleanup_(dissected_image_unrefp) DissectedImage *di = NULL;
_cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
_cleanup_(json_variant_unrefp) JsonVariant *aj = NULL;
_cleanup_close_ int image_fd = -EBADF, userns_fd = -EBADF;
_cleanup_(image_policy_freep) ImagePolicy *use_policy = NULL;
Hashmap **polkit_registry = ASSERT_PTR(userdata);
_cleanup_free_ char *ps = NULL;
bool image_is_trusted = false;
uid_t peer_uid;
int r;
assert(link);
assert(parameters);
json_variant_sensitive(parameters); /* might contain passwords */
r = varlink_get_peer_uid(link, &peer_uid);
if (r < 0)
return log_debug_errno(r, "Failed to get client UID: %m");
r = varlink_dispatch(link, parameters, dispatch_table, &p);
if (r != 0)
return r;
if (p.image_fd_idx != UINT_MAX) {
image_fd = varlink_peek_dup_fd(link, p.image_fd_idx);
if (image_fd < 0)
return log_debug_errno(image_fd, "Failed to peek image fd from client: %m");
}
if (p.userns_fd_idx != UINT_MAX) {
userns_fd = varlink_peek_dup_fd(link, p.userns_fd_idx);
if (userns_fd < 0)
return log_debug_errno(userns_fd, "Failed to peek user namespace fd from client: %m");
}
r = validate_image_fd(image_fd, &p);
if (r < 0)
return r;
r = validate_userns(link, &userns_fd);
if (r != 0)
return r;
r = verify_trusted_image_fd_by_path(image_fd);
if (r < 0)
return r;
image_is_trusted = r;
const char *polkit_details[] = {
"read_only", one_zero(p.read_only > 0),
NULL,
};
const char *polkit_action, *polkit_untrusted_action;
PolkitFlags polkit_flags;
if (userns_fd < 0) {
/* Mount into the host user namespace */
polkit_action = "io.systemd.mount-file-system.mount-image";
polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-image";
polkit_flags = 0;
} else {
/* Mount into a private user namespace */
polkit_action = "io.systemd.mount-file-system.mount-image-privately";
polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-image-privately";
/* If polkit is not around, let's allow mounting authenticated images by default */
polkit_flags = POLKIT_DEFAULT_ALLOW;
}
/* Let's definitely acquire the regular action privilege, for mounting properly signed images */
r = varlink_verify_polkit_async_full(
link,
/* bus= */ NULL,
polkit_action,
polkit_details,
/* good_user= */ UID_INVALID,
polkit_flags,
polkit_registry);
if (r <= 0)
return r;
/* Generate the commmon dissection directory here. We are not going to use it, but the clients might,
* and they likely are unprivileged, hence cannot create it themselves. Hence let's jsut create it
* here, if it is missing. */
r = get_common_dissect_directory(NULL);
if (r < 0)
return r;
r = loop_device_make(
image_fd,
p.read_only == 0 ? O_RDONLY : O_RDWR,
0,
UINT64_MAX,
UINT32_MAX,
LO_FLAGS_PARTSCAN,
LOCK_EX,
&loop);
if (r < 0)
return r;
DissectImageFlags dissect_flags =
(p.read_only == 0 ? DISSECT_IMAGE_READ_ONLY : 0) |
(p.growfs != 0 ? DISSECT_IMAGE_GROWFS : 0) |
DISSECT_IMAGE_DISCARD_ANY |
DISSECT_IMAGE_FSCK |
DISSECT_IMAGE_ADD_PARTITION_DEVICES |
DISSECT_IMAGE_PIN_PARTITION_DEVICES |
DISSECT_IMAGE_ALLOW_USERSPACE_VERITY;
/* Let's see if we have acquired the privilege to mount untrusted images already */
bool polkit_have_untrusted_action =
varlink_has_polkit_action(link, polkit_untrusted_action, polkit_details, polkit_registry);
for (;;) {
use_policy = image_policy_free(use_policy);
ps = mfree(ps);
/* We use the image policy for trusted images if either the path is below a trusted
* directory, or if we have already acquired a PK authentication that tells us that untrusted
* images are OK */
bool use_trusted_policy =
image_is_trusted ||
polkit_have_untrusted_action;
r = determine_image_policy(
image_fd,
use_trusted_policy,
p.image_policy,
&use_policy);
if (r < 0)
return r;
r = image_policy_to_string(use_policy, /* simplify= */ true, &ps);
if (r < 0)
return r;
log_debug("Using image policy: %s", ps);
r = dissect_loop_device(
loop,
&verity,
/* mount_options= */ NULL,
use_policy,
dissect_flags,
&di);
if (r == -ENOPKG)
return varlink_error(link, "io.systemd.MountFileSystem.IncompatibleImage", NULL);
if (r == -ENOTUNIQ)
return varlink_error(link, "io.systemd.MountFileSystem.MultipleRootPartitionsFound", NULL);
if (r == -ENXIO)
return varlink_error(link, "io.systemd.MountFileSystem.RootPartitionNotFound", NULL);
if (r == -ERFKILL) {
/* The image policy refused this, let's retry after trying to get PolicyKit */
if (!polkit_have_untrusted_action) {
log_debug("Denied by image policy. Trying a stronger polkit authentication before continuing.");
r = varlink_verify_polkit_async_full(
link,
/* bus= */ NULL,
polkit_untrusted_action,
polkit_details,
/* good_user= */ UID_INVALID,
/* flags= */ 0, /* NB: the image cannot be authenticated, hence unless PK is around to allow this anyway, fail! */
polkit_registry);
if (r <= 0 && !ERRNO_IS_NEG_PRIVILEGE(r))
return r;
if (r > 0) {
/* Try again, now that we know the client has enough privileges. */
log_debug("Denied by image policy, retrying after polkit authentication.");
polkit_have_untrusted_action = true;
continue;
}
}
return varlink_error(link, "io.systemd.MountFileSystem.DeniedByImagePolicy", NULL);
}
if (r < 0)
return r;
/* Success */
break;
}
r = dissected_image_load_verity_sig_partition(
di,
loop->fd,
&verity);
if (r < 0)
return r;
r = dissected_image_decrypt(
di,
p.password,
&verity,
dissect_flags);
if (r == -ENOKEY) /* new dm-verity userspace returns ENOKEY if the dm-verity signature key is not in
* key chain. That's great. */
return varlink_error(link, "io.systemd.MountFileSystem.KeyNotFound", NULL);
if (r == -EBUSY) /* DM kernel subsystem is shit with returning useful errors hence we keep retrying
* under the assumption that some errors are transitional. Which the errors might
* not actually be. After all retries failed we return EBUSY. Let's turn that into a
* generic Verity error. It's not very helpful, could mean anything, but at least it
* gives client a clear idea that this has to do with Verity. */
return varlink_error(link, "io.systemd.MountFileSystem.VerityFailure", NULL);
if (r < 0)
return r;
r = dissected_image_mount(
di,
/* where= */ NULL,
/* uid_shift= */ UID_INVALID,
/* uid_range= */ UID_INVALID,
userns_fd,
dissect_flags);
if (r < 0)
return r;
for (PartitionDesignator d = 0; d < _PARTITION_DESIGNATOR_MAX; d++) {
_cleanup_(json_variant_unrefp) JsonVariant *pj = NULL;
DissectedPartition *pp = di->partitions + d;
int fd_idx;
if (!pp->found)
continue;
if (pp->fsmount_fd < 0)
continue;
if (userns_fd >= 0) {
r = nsresource_add_mount(userns_fd, pp->fsmount_fd);
if (r < 0)
return r;
}
fd_idx = varlink_push_fd(link, pp->fsmount_fd);
if (fd_idx < 0)
return fd_idx;
TAKE_FD(pp->fsmount_fd);
r = json_build(&pj,
JSON_BUILD_OBJECT(
JSON_BUILD_PAIR("designator", JSON_BUILD_STRING(partition_designator_to_string(d))),
JSON_BUILD_PAIR("writable", JSON_BUILD_BOOLEAN(pp->rw)),
JSON_BUILD_PAIR("growFileSystem", JSON_BUILD_BOOLEAN(pp->growfs)),
JSON_BUILD_PAIR_CONDITION(pp->partno > 0, "partitionNumber", JSON_BUILD_INTEGER(pp->partno)),
JSON_BUILD_PAIR_CONDITION(pp->architecture > 0, "architecture", JSON_BUILD_STRING(architecture_to_string(pp->architecture))),
JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(pp->uuid), "partitionUuid", JSON_BUILD_UUID(pp->uuid)),
JSON_BUILD_PAIR("fileSystemType", JSON_BUILD_STRING(dissected_partition_fstype(pp))),
JSON_BUILD_PAIR_CONDITION(pp->label, "partitionLabel", JSON_BUILD_STRING(pp->label)),
JSON_BUILD_PAIR("size", JSON_BUILD_INTEGER(pp->size)),
JSON_BUILD_PAIR("offset", JSON_BUILD_INTEGER(pp->offset)),
JSON_BUILD_PAIR("mountFileDescriptor", JSON_BUILD_INTEGER(fd_idx))));
if (r < 0)
return r;
r = json_variant_append_array(&aj, pj);
if (r < 0)
return r;
}
loop_device_relinquish(loop);
r = varlink_replyb(link, JSON_BUILD_OBJECT(
JSON_BUILD_PAIR("partitions", JSON_BUILD_VARIANT(aj)),
JSON_BUILD_PAIR("imagePolicy", JSON_BUILD_STRING(ps)),
JSON_BUILD_PAIR("imageSize", JSON_BUILD_INTEGER(di->image_size)),
JSON_BUILD_PAIR("sectorSize", JSON_BUILD_INTEGER(di->sector_size)),
JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(di->image_uuid), "imageUuid", JSON_BUILD_UUID(di->image_uuid))));
if (r < 0)
return r;
return r;
}
static int process_connection(VarlinkServer *server, int _fd) {
_cleanup_close_ int fd = TAKE_FD(_fd); /* always take possesion */
_cleanup_(varlink_close_unrefp) Varlink *vl = NULL;
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
int r;
r = sd_event_new(&event);
if (r < 0)
return r;
r = varlink_server_attach_event(server, event, 0);
if (r < 0)
return log_error_errno(r, "Failed to attach Varlink server to event loop: %m");
r = varlink_server_add_connection(server, fd, &vl);
if (r < 0)
return log_error_errno(r, "Failed to add connection: %m");
TAKE_FD(fd);
vl = varlink_ref(vl);
r = varlink_set_allow_fd_passing_input(vl, true);
if (r < 0)
return log_error_errno(r, "Failed to enable fd passing for read: %m");
r = varlink_set_allow_fd_passing_output(vl, true);
if (r < 0)
return log_error_errno(r, "Failed to enable fd passing for write: %m");
r = sd_event_loop(event);
if (r < 0)
return log_error_errno(r, "Failed to run event loop: %m");
r = varlink_server_detach_event(server);
if (r < 0)
return log_error_errno(r, "Failed to detach Varlink server from event loop: %m");
return 0;
}
static int run(int argc, char *argv[]) {
usec_t start_time, listen_idle_usec, last_busy_usec = USEC_INFINITY;
_cleanup_(varlink_server_unrefp) VarlinkServer *server = NULL;
_cleanup_(hashmap_freep) Hashmap *polkit_registry = NULL;
_cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
unsigned n_iterations = 0;
int m, listen_fd, r;
log_setup();
m = sd_listen_fds(false);
if (m < 0)
return log_error_errno(m, "Failed to determine number of listening fds: %m");
if (m == 0)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No socket to listen on received.");
if (m > 1)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Worker can only listen on a single socket at a time.");
listen_fd = SD_LISTEN_FDS_START;
r = fd_nonblock(listen_fd, false);
if (r < 0)
return log_error_errno(r, "Failed to turn off non-blocking mode for listening socket: %m");
r = varlink_server_new(&server, VARLINK_SERVER_INHERIT_USERDATA);
if (r < 0)
return log_error_errno(r, "Failed to allocate server: %m");
r = varlink_server_add_interface(server, &vl_interface_io_systemd_MountFileSystem);
if (r < 0)
return log_error_errno(r, "Failed to add MountFileSystem interface to varlink server: %m");
r = varlink_server_bind_method_many(
server,
"io.systemd.MountFileSystem.MountImage",vl_method_mount_image);
if (r < 0)
return log_error_errno(r, "Failed to bind methods: %m");
varlink_server_set_userdata(server, &polkit_registry);
r = varlink_server_set_exit_on_idle(server, true);
if (r < 0)
return log_error_errno(r, "Failed to enable exit-on-idle mode: %m");
r = getenv_bool("MOUNTFS_FIXED_WORKER");
if (r < 0)
return log_error_errno(r, "Failed to parse MOUNTFSD_FIXED_WORKER: %m");
listen_idle_usec = r ? USEC_INFINITY : LISTEN_IDLE_USEC;
r = pidref_set_parent(&parent);
if (r < 0)
return log_error_errno(r, "Failed to acquire pidfd of parent process: %m");
start_time = now(CLOCK_MONOTONIC);
for (;;) {
_cleanup_close_ int fd = -EBADF;
usec_t n;
/* Exit the worker in regular intervals, to flush out all memory use */
if (n_iterations++ > ITERATIONS_MAX) {
log_debug("Exiting worker, processed %u iterations, that's enough.", n_iterations);
break;
}
n = now(CLOCK_MONOTONIC);
if (n >= usec_add(start_time, RUNTIME_MAX_USEC)) {
log_debug("Exiting worker, ran for %s, that's enough.",
FORMAT_TIMESPAN(usec_sub_unsigned(n, start_time), 0));
break;
}
if (last_busy_usec == USEC_INFINITY)
last_busy_usec = n;
else if (listen_idle_usec != USEC_INFINITY && n >= usec_add(last_busy_usec, listen_idle_usec)) {
log_debug("Exiting worker, been idle for %s.",
FORMAT_TIMESPAN(usec_sub_unsigned(n, last_busy_usec), 0));
break;
}
(void) rename_process("systemd-mountwork: waiting...");
fd = RET_NERRNO(accept4(listen_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC));
(void) rename_process("systemd-mountwork: processing...");
if (fd == -EAGAIN)
continue; /* The listening socket has SO_RECVTIMEO set, hence a timeout is expected
* after a while, let's check if it's time to exit though. */
if (fd == -EINTR)
continue; /* Might be that somebody attached via strace, let's just continue in that
* case */
if (fd < 0)
return log_error_errno(fd, "Failed to accept() from listening socket: %m");
if (now(CLOCK_MONOTONIC) <= usec_add(n, PRESSURE_SLEEP_TIME_USEC)) {
/* We only slept a very short time? If so, let's see if there are more sockets
* pending, and if so, let's ask our parent for more workers */
r = fd_wait_for_event(listen_fd, POLLIN, 0);
if (r < 0)
return log_error_errno(r, "Failed to test for POLLIN on listening socket: %m");
if (FLAGS_SET(r, POLLIN)) {
r = pidref_kill(&parent, SIGUSR2);
if (r == -ESRCH)
return log_error_errno(r, "Parent already died?");
if (r < 0)
return log_error_errno(r, "Failed to send SIGUSR2 signal to parent. %m");
}
}
(void) process_connection(server, TAKE_FD(fd));
last_busy_usec = USEC_INFINITY;
}
return 0;
}
DEFINE_MAIN_FUNCTION(run);

View File

@ -181,6 +181,7 @@ shared_sources = files(
'varlink-io.systemd.Hostname.c',
'varlink-io.systemd.Journal.c',
'varlink-io.systemd.ManagedOOM.c',
'varlink-io.systemd.MountFileSystem.c',
'varlink-io.systemd.NamespaceResource.c',
'varlink-io.systemd.Network.c',
'varlink-io.systemd.PCRExtend.c',

View File

@ -0,0 +1,69 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include "varlink-io.systemd.MountFileSystem.h"
static VARLINK_DEFINE_ENUM_TYPE(
PartitionDesignator,
VARLINK_DEFINE_ENUM_VALUE(root),
VARLINK_DEFINE_ENUM_VALUE(usr),
VARLINK_DEFINE_ENUM_VALUE(home),
VARLINK_DEFINE_ENUM_VALUE(srv),
VARLINK_DEFINE_ENUM_VALUE(esp),
VARLINK_DEFINE_ENUM_VALUE(xbootldr),
VARLINK_DEFINE_ENUM_VALUE(swap),
VARLINK_DEFINE_ENUM_VALUE(root_verity),
VARLINK_DEFINE_ENUM_VALUE(usr_verity),
VARLINK_DEFINE_ENUM_VALUE(root_verity_sig),
VARLINK_DEFINE_ENUM_VALUE(usr_verity_sig),
VARLINK_DEFINE_ENUM_VALUE(tmp),
VARLINK_DEFINE_ENUM_VALUE(var));
static VARLINK_DEFINE_STRUCT_TYPE(
PartitionInfo,
VARLINK_DEFINE_FIELD(designator, VARLINK_STRING, 0),
VARLINK_DEFINE_FIELD(writable, VARLINK_BOOL, 0),
VARLINK_DEFINE_FIELD(growFileSystem, VARLINK_BOOL, 0),
VARLINK_DEFINE_FIELD(partitionNumber, VARLINK_INT, VARLINK_NULLABLE),
VARLINK_DEFINE_FIELD(architecture, VARLINK_STRING, VARLINK_NULLABLE),
VARLINK_DEFINE_FIELD(partitionUuid, VARLINK_STRING, VARLINK_NULLABLE),
VARLINK_DEFINE_FIELD(fileSystemType, VARLINK_STRING, 0),
VARLINK_DEFINE_FIELD(partitionLabel, VARLINK_STRING, VARLINK_NULLABLE),
VARLINK_DEFINE_FIELD(size, VARLINK_INT, 0),
VARLINK_DEFINE_FIELD(offset, VARLINK_INT, 0),
VARLINK_DEFINE_FIELD(mountFileDescriptor, VARLINK_INT, 0));
static VARLINK_DEFINE_METHOD(
MountImage,
VARLINK_DEFINE_INPUT(imageFileDescriptor, VARLINK_INT, 0),
VARLINK_DEFINE_INPUT(userNamespaceFileDescriptor, VARLINK_INT, VARLINK_NULLABLE),
VARLINK_DEFINE_INPUT(readOnly, VARLINK_BOOL, VARLINK_NULLABLE),
VARLINK_DEFINE_INPUT(growFileSystems, VARLINK_BOOL, VARLINK_NULLABLE),
VARLINK_DEFINE_INPUT(password, VARLINK_STRING, VARLINK_NULLABLE),
VARLINK_DEFINE_INPUT(imagePolicy, VARLINK_STRING, VARLINK_NULLABLE),
VARLINK_DEFINE_INPUT(allowInteractiveAuthentication, VARLINK_BOOL, VARLINK_NULLABLE),
VARLINK_DEFINE_OUTPUT_BY_TYPE(partitions, PartitionInfo, VARLINK_ARRAY),
VARLINK_DEFINE_OUTPUT(imagePolicy, VARLINK_STRING, 0),
VARLINK_DEFINE_OUTPUT(imageSize, VARLINK_INT, 0),
VARLINK_DEFINE_OUTPUT(sectorSize, VARLINK_INT, 0),
VARLINK_DEFINE_OUTPUT(imageName, VARLINK_STRING, VARLINK_NULLABLE),
VARLINK_DEFINE_OUTPUT(imageUuid, VARLINK_STRING, VARLINK_NULLABLE));
static VARLINK_DEFINE_ERROR(IncompatibleImage);
static VARLINK_DEFINE_ERROR(MultipleRootPartitionsFound);
static VARLINK_DEFINE_ERROR(RootPartitionNotFound);
static VARLINK_DEFINE_ERROR(DeniedByImagePolicy);
static VARLINK_DEFINE_ERROR(KeyNotFound);
static VARLINK_DEFINE_ERROR(VerityFailure);
VARLINK_DEFINE_INTERFACE(
io_systemd_MountFileSystem,
"io.systemd.MountFileSystem",
&vl_type_PartitionDesignator,
&vl_type_PartitionInfo,
&vl_method_MountImage,
&vl_error_IncompatibleImage,
&vl_error_MultipleRootPartitionsFound,
&vl_error_RootPartitionNotFound,
&vl_error_DeniedByImagePolicy,
&vl_error_KeyNotFound,
&vl_error_VerityFailure);

View File

@ -0,0 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#include "varlink-idl.h"
extern const VarlinkInterface vl_interface_io_systemd_MountFileSystem;

View File

@ -12,6 +12,7 @@
#include "varlink-io.systemd.Credentials.h"
#include "varlink-io.systemd.Journal.h"
#include "varlink-io.systemd.ManagedOOM.h"
#include "varlink-io.systemd.MountFileSystem.h"
#include "varlink-io.systemd.NamespaceResource.h"
#include "varlink-io.systemd.Network.h"
#include "varlink-io.systemd.PCRExtend.h"
@ -140,6 +141,8 @@ TEST(parse_format) {
print_separator();
test_parse_format_one(&vl_interface_io_systemd_ManagedOOM);
print_separator();
test_parse_format_one(&vl_interface_io_systemd_MountFileSystem);
print_separator();
test_parse_format_one(&vl_interface_io_systemd_Network);
print_separator();
test_parse_format_one(&vl_interface_io_systemd_oom);

View File

@ -732,6 +732,14 @@ units = [
'file' : 'systemd-userdbd.socket',
'conditions' : ['ENABLE_USERDB'],
},
{
'file' : 'systemd-mountfsd.service.in',
'conditions' : ['ENABLE_MOUNTFSD'],
},
{
'file' : 'systemd-mountfsd.socket',
'conditions' : ['ENABLE_MOUNTFSD'],
},
{
'file' : 'systemd-nsresourced.service.in',
'conditions' : ['ENABLE_NSRESOURCED'],

View File

@ -0,0 +1,46 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
#
# This file is part of systemd.
#
# systemd is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
[Unit]
Description=DDI File System Mounter
Documentation=man:systemd-mountfsd.service(8)
Requires=systemd-mountfsd.socket
After=systemd-mountfsd.socket
Conflicts=shutdown.target
Before=sysinit.target shutdown.target
DefaultDependencies=no
[Service]
#CapabilityBoundingSet=CAP_DAC_READ_SEARCH CAP_SYS_RESOURCE CAP_BPF CAP_PERFMON CAP_SETGID CAP_SETUID
ExecStart={{LIBEXECDIR}}/systemd-mountfsd
IPAddressDeny=any
LimitNOFILE={{HIGH_RLIMIT_NOFILE}}
LockPersonality=yes
MemoryDenyWriteExecute=yes
NoNewPrivileges=yes
ProtectProc=invisible
ProtectControlGroups=yes
ProtectHome=yes
ProtectHostname=yes
ProtectKernelLogs=yes
ProtectKernelModules=yes
ProtectSystem=strict
RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6
RestrictRealtime=yes
RestrictSUIDSGID=yes
SystemCallArchitectures=native
SystemCallErrorNumber=EPERM
SystemCallFilter=@system-service @mount
Type=notify
NotifyAccess=all
FileDescriptorStoreMax=4096
{{SERVICE_WATCHDOG}}
[Install]
Also=systemd-mountfsd.socket

View File

@ -0,0 +1,22 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
#
# This file is part of systemd.
#
# systemd is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
[Unit]
Description=DDI File System Mounter Socket
Documentation=man:systemd-mountfsd.service(8)
DefaultDependencies=no
Conflicts=shutdown.target
Before=sockets.target shutdown.target
[Socket]
ListenStream=/run/systemd/io.systemd.MountFileSystem
SocketMode=0666
[Install]
WantedBy=sockets.target