mirror of
https://github.com/systemd/systemd.git
synced 2024-11-24 10:43:35 +08:00
namespace: don't try to remount superblocks
We can't remount the underlying superblocks, if we are inside a user
namespace and running Linux <= 4.17. We can only change the per-mount
flags (MS_REMOUNT | MS_BIND).
This type of mount() call can only change the per-mount flags, so we
don't have to worry about passing the right string options now.
Fixes #9914 ("Since 1beab8b
was merged, systemd has been failing to start
systemd-resolved inside unprivileged containers" ... "Failed to re-mount
'/run/systemd/unit-root/dev' read-only: Operation not permitted").
> It's basically my fault :-). I pointed out we could remount read-only
> without MS_BIND when reviewing the PR that added TemporaryFilesystem=,
> and poettering suggested to change PrivateDevices= at the same time.
> I think it's safe to change back, and I don't expect anyone will notice
> a difference in behaviour.
>
> It just surprised me to realize that
> `TemporaryFilesystem=/tmp:size=10M,ro,nosuid` would not apply `ro` to the
> superblock (underlying filesystem), like mount -osize=10M,ro,nosuid does.
> Maybe a comment could note the kernel version (v4.18), that lets you
> remount without MS_BIND inside a user namespace.
This makes the code longer and I guess this function is still ugly, sorry.
One obstacle to cleaning it up is the interaction between
`PrivateDevices=yes` and `ReadOnlyPaths=/dev`. I've added a test for the
existing behaviour, which I think is now the correct behaviour.
This commit is contained in:
parent
e094eb1188
commit
69338c3dfb
@ -1026,6 +1026,15 @@ static int apply_mount(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Change the per-mount readonly flag on an existing mount */
|
||||
static int remount_bind_readonly(const char *path, unsigned long orig_flags) {
|
||||
int r;
|
||||
|
||||
r = mount(NULL, path, NULL, MS_REMOUNT | MS_BIND | MS_RDONLY | orig_flags, NULL);
|
||||
|
||||
return r < 0 ? -errno : 0;
|
||||
}
|
||||
|
||||
static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self_mountinfo) {
|
||||
bool submounts = false;
|
||||
int r = 0;
|
||||
@ -1035,17 +1044,15 @@ static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self
|
||||
|
||||
if (mount_entry_read_only(m)) {
|
||||
if (IN_SET(m->mode, EMPTY_DIR, TMPFS)) {
|
||||
/* Make superblock readonly */
|
||||
if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT | MS_RDONLY | m->flags, mount_entry_options(m)) < 0)
|
||||
r = -errno;
|
||||
r = remount_bind_readonly(mount_entry_path(m), m->flags);
|
||||
} else {
|
||||
submounts = true;
|
||||
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo);
|
||||
}
|
||||
} else if (m->mode == PRIVATE_DEV) {
|
||||
/* Superblock can be readonly but the submounts can't */
|
||||
if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
|
||||
r = -errno;
|
||||
/* Set /dev readonly, but not submounts like /dev/shm. Also, we only set the per-mount read-only flag.
|
||||
* We can't set it on the superblock, if we are inside a user namespace and running Linux <= 4.17. */
|
||||
r = remount_bind_readonly(mount_entry_path(m), DEV_MOUNT_OPTIONS);
|
||||
} else
|
||||
return 0;
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
Description=Test for ReadOnlyPaths=
|
||||
|
||||
[Service]
|
||||
ReadOnlyPaths=/etc -/i-dont-exist /usr
|
||||
ExecStart=/bin/sh -x -c 'test ! -w /etc && test ! -w /usr && test ! -e /i-dont-exist && test -w /var'
|
||||
ReadOnlyPaths=/usr /etc /sys /dev -/i-dont-exist
|
||||
PrivateDevices=yes
|
||||
ExecStart=/bin/sh -x -c 'test ! -w /usr && test ! -w /etc && test ! -w /sys && test ! -w /sys/fs/cgroup'
|
||||
ExecStart=/bin/sh -x -c 'test ! -w /dev && test ! -w /dev/shm && test ! -e /i-dont-exist && test -w /var'
|
||||
Type=oneshot
|
||||
|
@ -5,11 +5,10 @@ Description=Test for TemporaryFileSystem with mount options
|
||||
Type=oneshot
|
||||
|
||||
# The mount options default to "mode=0755,nodev,strictatime".
|
||||
# Let's override some of them, and test the behaviour of "ro".
|
||||
# Let's override some of them, and test "ro".
|
||||
TemporaryFileSystem=/var:ro,mode=0700,nostrictatime
|
||||
|
||||
# Check /proc/self/mountinfo
|
||||
ExecStart=/bin/sh -x -c 'test "$$(awk \'$$5 == "/var" && $$11 !~ /(^|,)ro(,|$$)/ { print $$6 }\' /proc/self/mountinfo)" = ""'
|
||||
ExecStart=/bin/sh -x -c 'test "$$(awk \'$$5 == "/var" && $$11 !~ /(^|,)mode=700(,|$$)/ { print $$6 }\' /proc/self/mountinfo)" = ""'
|
||||
|
||||
ExecStart=/bin/sh -x -c 'test "$$(awk \'$$5 == "/var" && $$6 !~ /(^|,)ro(,|$$)/ { print $$6 }\' /proc/self/mountinfo)" = ""'
|
||||
|
Loading…
Reference in New Issue
Block a user