mirror of
https://github.com/systemd/systemd.git
synced 2025-01-26 10:24:17 +08:00
core: add RootImage= setting for using a specific image file as root directory for a service
This is similar to RootDirectory= but mounts the root file system from a block device or loopback file instead of another directory. This reuses the image dissector code now used by nspawn and gpt-auto-discovery.
This commit is contained in:
parent
2eedfd2d8b
commit
915e6d1676
@ -86,12 +86,10 @@
|
||||
<para>A few execution parameters result in additional, automatic
|
||||
dependencies to be added.</para>
|
||||
|
||||
<para>Units with <varname>WorkingDirectory=</varname> or
|
||||
<varname>RootDirectory=</varname> set automatically gain
|
||||
dependencies of type <varname>Requires=</varname> and
|
||||
<varname>After=</varname> on all mount units required to access
|
||||
the specified paths. This is equivalent to having them listed
|
||||
explicitly in <varname>RequiresMountsFor=</varname>.</para>
|
||||
<para>Units with <varname>WorkingDirectory=</varname>, <varname>RootDirectory=</varname> or
|
||||
<varname>RootImage=</varname> set automatically gain dependencies of type <varname>Requires=</varname> and
|
||||
<varname>After=</varname> on all mount units required to access the specified paths. This is equivalent to having
|
||||
them listed explicitly in <varname>RequiresMountsFor=</varname>.</para>
|
||||
|
||||
<para>Similar, units with <varname>PrivateTmp=</varname> enabled automatically get mount unit dependencies for all
|
||||
mounts required to access <filename>/tmp</filename> and <filename>/var/tmp</filename>. They will also gain an
|
||||
@ -117,9 +115,10 @@
|
||||
<varname>User=</varname> is used. If not set, defaults to the root directory when systemd is running as a
|
||||
system instance and the respective user's home directory if run as user. If the setting is prefixed with the
|
||||
<literal>-</literal> character, a missing working directory is not considered fatal. If
|
||||
<varname>RootDirectory=</varname> is not set, then <varname>WorkingDirectory=</varname> is relative to the root
|
||||
of the system running the service manager. Note that setting this parameter might result in additional
|
||||
dependencies to be added to the unit (see above).</para></listitem>
|
||||
<varname>RootDirectory=</varname>/<varname>RootImage=</varname> is not set, then
|
||||
<varname>WorkingDirectory=</varname> is relative to the root of the system running the service manager. Note
|
||||
that setting this parameter might result in additional dependencies to be added to the unit (see
|
||||
above).</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
@ -136,13 +135,24 @@
|
||||
in conjunction with <varname>RootDirectory=</varname>. For details, see below.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>RootImage=</varname></term>
|
||||
<listitem><para>Takes a path to a block device node or regular file as argument. This call is similar to
|
||||
<varname>RootDirectory=</varname> however mounts a file system hierarchy from a block device node or loopack
|
||||
file instead of a directory. The device node or file system image file needs to contain a file system without a
|
||||
partition table, or a file system within an MBR/MS-DOS or GPT partition table with only a single
|
||||
Linux-compatible partition, or a set of file systems within a GPT partition table that follows the <ulink
|
||||
url="http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/">Discoverable Partitions
|
||||
Specification</ulink>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>MountAPIVFS=</varname></term>
|
||||
|
||||
<listitem><para>Takes a boolean argument. If on, a private mount namespace for the unit's processes is created
|
||||
and the API file systems <filename>/proc</filename>, <filename>/sys</filename> and <filename>/dev</filename>
|
||||
will be mounted inside of it, unless they are already mounted. Note that this option has no effect unless used
|
||||
in conjunction with <varname>RootDirectory=</varname> as these three mounts are generally mounted in the host
|
||||
in conjunction with <varname>RootDirectory=</varname>/<varname>RootImage=</varname> as these three mounts are generally mounted in the host
|
||||
anyway, and unless the root directory is changed the private mount namespace will be a 1:1 copy of the host's,
|
||||
and include these three mounts. Note that the <filename>/dev</filename> file system of the host is bind mounted
|
||||
if this option is used without <varname>PrivateDevices=</varname>. To run the service with a private, minimal
|
||||
@ -952,7 +962,7 @@
|
||||
access a process might have to the file system hierarchy. Each setting takes a space-separated list of paths
|
||||
relative to the host's root directory (i.e. the system running the service manager). Note that if paths
|
||||
contain symlinks, they are resolved relative to the root directory set with
|
||||
<varname>RootDirectory=</varname>.</para>
|
||||
<varname>RootDirectory=</varname>/<varname>RootImage=</varname>.</para>
|
||||
|
||||
<para>Paths listed in <varname>ReadWritePaths=</varname> are accessible from within the namespace with the same
|
||||
access modes as from outside of it. Paths listed in <varname>ReadOnlyPaths=</varname> are accessible for
|
||||
@ -971,9 +981,10 @@
|
||||
<para>Paths in <varname>ReadWritePaths=</varname>, <varname>ReadOnlyPaths=</varname> and
|
||||
<varname>InaccessiblePaths=</varname> may be prefixed with <literal>-</literal>, in which case they will be
|
||||
ignored when they do not exist. If prefixed with <literal>+</literal> the paths are taken relative to the root
|
||||
directory of the unit, as configured with <varname>RootDirectory=</varname>, instead of relative to the root
|
||||
directory of the host (see above). When combining <literal>-</literal> and <literal>+</literal> on the same
|
||||
path make sure to specify <literal>-</literal> first, and <literal>+</literal> second.</para>
|
||||
directory of the unit, as configured with <varname>RootDirectory=</varname>/<varname>RootImage=</varname>,
|
||||
instead of relative to the root directory of the host (see above). When combining <literal>-</literal> and
|
||||
<literal>+</literal> on the same path make sure to specify <literal>-</literal> first, and <literal>+</literal>
|
||||
second.</para>
|
||||
|
||||
<para>Note that using this setting will disconnect propagation of mounts from the service to the host
|
||||
(propagation in the opposite direction continues to work). This means that this setting may not be used for
|
||||
@ -1004,9 +1015,9 @@
|
||||
that in this case both read-only and regular bind mounts are reset, regardless which of the two settings is
|
||||
used.</para>
|
||||
|
||||
<para>This option is particularly useful when <varname>RootDirectory=</varname> is used. In this case the
|
||||
source path refers to a path on the host file system, while the destination path refers to a path below the
|
||||
root directory of the unit.</para></listitem>
|
||||
<para>This option is particularly useful when <varname>RootDirectory=</varname>/<varname>RootImage=</varname>
|
||||
is used. In this case the source path refers to a path on the host file system, while the destination path
|
||||
refers to a path below the root directory of the unit.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
@ -1094,10 +1105,10 @@
|
||||
such as <varname>CapabilityBoundingSet=</varname> will affect only the latter, and there's no way to acquire
|
||||
additional capabilities in the host's user namespace. Defaults to off.</para>
|
||||
|
||||
<para>This setting is particularly useful in conjunction with <varname>RootDirectory=</varname>, as the need to
|
||||
synchronize the user and group databases in the root directory and on the host is reduced, as the only users
|
||||
and groups who need to be matched are <literal>root</literal>, <literal>nobody</literal> and the unit's own
|
||||
user and group.</para></listitem>
|
||||
<para>This setting is particularly useful in conjunction with
|
||||
<varname>RootDirectory=</varname>/<varname>RootImage=</varname>, as the need to synchronize the user and group
|
||||
databases in the root directory and on the host is reduced, as the only users and groups who need to be matched
|
||||
are <literal>root</literal>, <literal>nobody</literal> and the unit's own user and group.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
@ -758,6 +758,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
|
||||
SD_BUS_PROPERTY("LimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("WorkingDirectory", "s", property_get_working_directory, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("RootDirectory", "s", NULL, offsetof(ExecContext, root_directory), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("RootImage", "s", NULL, offsetof(ExecContext, root_image), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("OOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("Nice", "i", property_get_nice, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("IOScheduling", "i", property_get_ioprio, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
@ -1048,7 +1049,7 @@ int bus_exec_context_set_transient_property(
|
||||
|
||||
return 1;
|
||||
|
||||
} else if (STR_IN_SET(name, "TTYPath", "RootDirectory")) {
|
||||
} else if (STR_IN_SET(name, "TTYPath", "RootDirectory", "RootImage")) {
|
||||
const char *s;
|
||||
|
||||
r = sd_bus_message_read(message, "s", &s);
|
||||
@ -1061,6 +1062,8 @@ int bus_exec_context_set_transient_property(
|
||||
if (mode != UNIT_CHECK) {
|
||||
if (streq(name, "TTYPath"))
|
||||
r = free_and_strdup(&c->tty_path, s);
|
||||
else if (streq(name, "RootImage"))
|
||||
r = free_and_strdup(&c->root_image, s);
|
||||
else {
|
||||
assert(streq(name, "RootDirectory"));
|
||||
r = free_and_strdup(&c->root_directory, s);
|
||||
|
@ -1640,6 +1640,9 @@ static bool exec_needs_mount_namespace(
|
||||
assert(context);
|
||||
assert(params);
|
||||
|
||||
if (context->root_image)
|
||||
return true;
|
||||
|
||||
if (!strv_isempty(context->read_write_paths) ||
|
||||
!strv_isempty(context->read_only_paths) ||
|
||||
!strv_isempty(context->inaccessible_paths))
|
||||
@ -1938,7 +1941,7 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
|
||||
int r;
|
||||
_cleanup_strv_free_ char **rw = NULL;
|
||||
char *tmp = NULL, *var = NULL;
|
||||
const char *root_dir = NULL;
|
||||
const char *root_dir = NULL, *root_image = NULL;
|
||||
NameSpaceInfo ns_info = {
|
||||
.ignore_protect_paths = false,
|
||||
.private_dev = context->private_devices,
|
||||
@ -1965,8 +1968,12 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (params->flags & EXEC_APPLY_CHROOT)
|
||||
root_dir = context->root_directory;
|
||||
if (params->flags & EXEC_APPLY_CHROOT) {
|
||||
root_image = context->root_image;
|
||||
|
||||
if (!root_image)
|
||||
root_dir = context->root_directory;
|
||||
}
|
||||
|
||||
/*
|
||||
* If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
|
||||
@ -1976,7 +1983,8 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
|
||||
if (!context->dynamic_user && root_dir)
|
||||
ns_info.ignore_protect_paths = true;
|
||||
|
||||
r = setup_namespace(root_dir, &ns_info, rw,
|
||||
r = setup_namespace(root_dir, root_image,
|
||||
&ns_info, rw,
|
||||
context->read_only_paths,
|
||||
context->inaccessible_paths,
|
||||
context->bind_mounts,
|
||||
@ -1985,7 +1993,8 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
|
||||
var,
|
||||
context->protect_home,
|
||||
context->protect_system,
|
||||
context->mount_flags);
|
||||
context->mount_flags,
|
||||
DISSECT_IMAGE_DISCARD_ON_LOOP);
|
||||
|
||||
/* If we couldn't set up the namespace this is probably due to a
|
||||
* missing capability. In this case, silently proceeed. */
|
||||
@ -1999,10 +2008,12 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
|
||||
return r;
|
||||
}
|
||||
|
||||
static int apply_working_directory(const ExecContext *context,
|
||||
const ExecParameters *params,
|
||||
const char *home,
|
||||
const bool needs_mount_ns) {
|
||||
static int apply_working_directory(
|
||||
const ExecContext *context,
|
||||
const ExecParameters *params,
|
||||
const char *home,
|
||||
const bool needs_mount_ns) {
|
||||
|
||||
const char *d;
|
||||
const char *wd;
|
||||
|
||||
@ -2983,6 +2994,7 @@ void exec_context_done(ExecContext *c) {
|
||||
|
||||
c->working_directory = mfree(c->working_directory);
|
||||
c->root_directory = mfree(c->root_directory);
|
||||
c->root_image = mfree(c->root_image);
|
||||
c->tty_path = mfree(c->tty_path);
|
||||
c->syslog_identifier = mfree(c->syslog_identifier);
|
||||
c->user = mfree(c->user);
|
||||
@ -3320,6 +3332,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
|
||||
prefix, yes_no(c->memory_deny_write_execute),
|
||||
prefix, yes_no(c->restrict_realtime));
|
||||
|
||||
if (c->root_image)
|
||||
fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
|
||||
|
||||
STRV_FOREACH(e, c->environment)
|
||||
fprintf(f, "%sEnvironment: %s\n", prefix, *e);
|
||||
|
||||
|
@ -106,7 +106,7 @@ struct ExecContext {
|
||||
char **pass_environment;
|
||||
|
||||
struct rlimit *rlimit[_RLIMIT_MAX];
|
||||
char *working_directory, *root_directory;
|
||||
char *working_directory, *root_directory, *root_image;
|
||||
bool working_directory_missing_ok;
|
||||
bool working_directory_home;
|
||||
|
||||
|
@ -19,6 +19,7 @@ m4_dnl Define the context options only once
|
||||
m4_define(`EXEC_CONTEXT_CONFIG_ITEMS',
|
||||
`$1.WorkingDirectory, config_parse_working_directory, 0, offsetof($1, exec_context)
|
||||
$1.RootDirectory, config_parse_unit_path_printf, 0, offsetof($1, exec_context.root_directory)
|
||||
$1.RootImage, config_parse_unit_path_printf, 0, offsetof($1, exec_context.root_image)
|
||||
$1.User, config_parse_user_group, 0, offsetof($1, exec_context.user)
|
||||
$1.Group, config_parse_user_group, 0, offsetof($1, exec_context.group)
|
||||
$1.SupplementaryGroups, config_parse_user_group_strv, 0, offsetof($1, exec_context.supplementary_groups)
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "dev-setup.h"
|
||||
#include "fd-util.h"
|
||||
#include "fs-util.h"
|
||||
#include "loop-util.h"
|
||||
#include "loopback-setup.h"
|
||||
#include "missing.h"
|
||||
#include "mkdir.h"
|
||||
@ -867,6 +868,7 @@ static unsigned namespace_calculate_mounts(
|
||||
|
||||
int setup_namespace(
|
||||
const char* root_directory,
|
||||
const char* root_image,
|
||||
const NameSpaceInfo *ns_info,
|
||||
char** read_write_paths,
|
||||
char** read_only_paths,
|
||||
@ -877,16 +879,46 @@ int setup_namespace(
|
||||
const char* var_tmp_dir,
|
||||
ProtectHome protect_home,
|
||||
ProtectSystem protect_system,
|
||||
unsigned long mount_flags) {
|
||||
unsigned long mount_flags,
|
||||
DissectImageFlags dissect_image_flags) {
|
||||
|
||||
_cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
|
||||
_cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
|
||||
MountEntry *m, *mounts = NULL;
|
||||
bool make_slave = false;
|
||||
unsigned n_mounts;
|
||||
int r = 0;
|
||||
|
||||
assert(ns_info);
|
||||
|
||||
if (mount_flags == 0)
|
||||
mount_flags = MS_SHARED;
|
||||
|
||||
if (root_image) {
|
||||
dissect_image_flags |= DISSECT_IMAGE_REQUIRE_ROOT;
|
||||
|
||||
if (protect_system == PROTECT_SYSTEM_STRICT && strv_isempty(read_write_paths))
|
||||
dissect_image_flags |= DISSECT_IMAGE_READ_ONLY;
|
||||
|
||||
r = loop_device_make_by_path(root_image,
|
||||
dissect_image_flags & DISSECT_IMAGE_READ_ONLY ? O_RDONLY : O_RDWR,
|
||||
&loop_device);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = dissect_image(loop_device->fd, NULL, 0, dissect_image_flags, &dissected_image);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (!root_directory) {
|
||||
/* Create a mount point for the image, if it's still missing. We use the same mount point for
|
||||
* all images, which is safe, since they all live in their own namespaces after all, and hence
|
||||
* won't see each other. */
|
||||
root_directory = "/run/systemd/unit-root";
|
||||
(void) mkdir(root_directory, 0700);
|
||||
}
|
||||
}
|
||||
|
||||
n_mounts = namespace_calculate_mounts(
|
||||
ns_info,
|
||||
read_write_paths,
|
||||
@ -1001,7 +1033,15 @@ int setup_namespace(
|
||||
}
|
||||
}
|
||||
|
||||
if (root_directory) {
|
||||
if (root_image) {
|
||||
r = dissected_image_mount(dissected_image, root_directory, dissect_image_flags);
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
|
||||
loop_device_relinquish(loop_device);
|
||||
|
||||
} else if (root_directory) {
|
||||
|
||||
/* Turn directory into bind mount, if it isn't one yet */
|
||||
r = path_is_mount_point(root_directory, NULL, AT_SYMLINK_FOLLOW);
|
||||
if (r < 0)
|
||||
|
@ -25,6 +25,7 @@ typedef struct BindMount BindMount;
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "dissect-image.h"
|
||||
#include "macro.h"
|
||||
|
||||
typedef enum ProtectHome {
|
||||
@ -63,6 +64,7 @@ struct BindMount {
|
||||
|
||||
int setup_namespace(
|
||||
const char *root_directory,
|
||||
const char *root_image,
|
||||
const NameSpaceInfo *ns_info,
|
||||
char **read_write_paths,
|
||||
char **read_only_paths,
|
||||
@ -73,7 +75,8 @@ int setup_namespace(
|
||||
const char *var_tmp_dir,
|
||||
ProtectHome protect_home,
|
||||
ProtectSystem protect_system,
|
||||
unsigned long mount_flags);
|
||||
unsigned long mount_flags,
|
||||
DissectImageFlags dissected_image_flags);
|
||||
|
||||
int setup_tmp_dirs(
|
||||
const char *id,
|
||||
|
@ -862,6 +862,12 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
|
||||
return r;
|
||||
}
|
||||
|
||||
if (c->root_image) {
|
||||
r = unit_require_mounts_for(u, c->root_image);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (!MANAGER_IS_SYSTEM(u->manager))
|
||||
return 0;
|
||||
|
||||
|
@ -266,7 +266,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
|
||||
"StandardInput", "StandardOutput", "StandardError",
|
||||
"Description", "Slice", "Type", "WorkingDirectory",
|
||||
"RootDirectory", "SyslogIdentifier", "ProtectSystem",
|
||||
"ProtectHome", "SELinuxContext", "Restart"))
|
||||
"ProtectHome", "SELinuxContext", "Restart", "RootImage"))
|
||||
r = sd_bus_message_append(m, "v", "s", eq);
|
||||
|
||||
else if (streq(field, "SyslogLevel")) {
|
||||
|
@ -77,6 +77,7 @@ int main(int argc, char *argv[]) {
|
||||
log_info("Not chrooted");
|
||||
|
||||
r = setup_namespace(root_directory,
|
||||
NULL,
|
||||
&ns_info,
|
||||
(char **) writable,
|
||||
(char **) readonly,
|
||||
@ -86,6 +87,7 @@ int main(int argc, char *argv[]) {
|
||||
var_tmp_dir,
|
||||
PROTECT_HOME_NO,
|
||||
PROTECT_SYSTEM_NO,
|
||||
0,
|
||||
0);
|
||||
if (r < 0) {
|
||||
log_error_errno(r, "Failed to setup namespace: %m");
|
||||
|
Loading…
Reference in New Issue
Block a user