diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml index cd45ceb2a16..3dbdf376d3c 100644 --- a/man/systemd-nspawn.xml +++ b/man/systemd-nspawn.xml @@ -522,6 +522,23 @@ . + + + + Takes the path to a file representing a kernel + network namespace that the container shall run in. The specified path + should refer to a (possibly bind-mounted) network namespace file, as + exposed by the kernel below /proc/$PID/ns/net. + This makes the container enter the given network namespace. One of the + typical use cases is to give a network namespace under + /run/netns created by ip-netns8, + for example, . + Note that this option cannot be used together with other + network-related options, such as + or . + + diff --git a/src/basic/missing.h b/src/basic/missing.h index bbdded984fb..790f9f55a59 100644 --- a/src/basic/missing.h +++ b/src/basic/missing.h @@ -1271,4 +1271,12 @@ struct fib_rule_uid_range { # define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) #endif +#ifndef NSFS_MAGIC +#define NSFS_MAGIC 0x6e736673 +#endif + +#ifndef NS_GET_NSTYPE +#define NS_GET_NSTYPE _IO(0xb7, 0x3) +#endif + #include "missing_syscall.h" diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c index c6b8507e9d8..96fc8b37872 100644 --- a/src/basic/stat-util.c +++ b/src/basic/stat-util.c @@ -226,6 +226,18 @@ int fd_is_temporary_fs(int fd) { return is_temporary_fs(&s); } +int fd_is_network_ns(int fd) { + int r; + + r = fd_is_fs_type(fd, NSFS_MAGIC); + if (r <= 0) + return r; + r = ioctl(fd, NS_GET_NSTYPE); + if (r < 0) + return -errno; + return r == CLONE_NEWNET; +} + int path_is_temporary_fs(const char *path) { _cleanup_close_ int fd = -1; diff --git a/src/basic/stat-util.h b/src/basic/stat-util.h index 8b8d1281211..d8d3c204960 100644 --- a/src/basic/stat-util.h +++ b/src/basic/stat-util.h @@ -62,6 +62,7 @@ int path_is_fs_type(const char *path, statfs_f_type_t magic_value); bool is_temporary_fs(const struct statfs *s) _pure_; int fd_is_temporary_fs(int fd); +int fd_is_network_ns(int fd); int path_is_temporary_fs(const char *path); /* Because statfs.t_type can be int on some architectures, we have to cast diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index f85856c077a..71b14e23029 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -190,6 +190,7 @@ static bool arg_network_veth = false; static char **arg_network_veth_extra = NULL; static char *arg_network_bridge = NULL; static char *arg_network_zone = NULL; +static char *arg_network_namespace_path = NULL; static unsigned long arg_personality = PERSONALITY_INVALID; static char *arg_image = NULL; static VolatileMode arg_volatile_mode = VOLATILE_NO; @@ -260,6 +261,9 @@ static void help(void) { " and attach it to an existing bridge on the host\n" " --network-zone=NAME Similar, but attach the new interface to an\n" " an automatically managed bridge interface\n" + " --network-namespace-path=PATH\n" + " Set network namespace to the one represented by\n" + " the specified kernel namespace file node\n" " -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n" " Expose a container IP port on the host\n" " -Z --selinux-context=SECLABEL\n" @@ -434,6 +438,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_NETWORK_BRIDGE, ARG_NETWORK_ZONE, ARG_NETWORK_VETH_EXTRA, + ARG_NETWORK_NAMESPACE_PATH, ARG_PERSONALITY, ARG_VOLATILE, ARG_TEMPLATE, @@ -450,55 +455,56 @@ static int parse_argv(int argc, char *argv[]) { }; static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "directory", required_argument, NULL, 'D' }, - { "template", required_argument, NULL, ARG_TEMPLATE }, - { "ephemeral", no_argument, NULL, 'x' }, - { "user", required_argument, NULL, 'u' }, - { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK }, - { "as-pid2", no_argument, NULL, 'a' }, - { "boot", no_argument, NULL, 'b' }, - { "uuid", required_argument, NULL, ARG_UUID }, - { "read-only", no_argument, NULL, ARG_READ_ONLY }, - { "capability", required_argument, NULL, ARG_CAPABILITY }, - { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY }, - { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL }, - { "bind", required_argument, NULL, ARG_BIND }, - { "bind-ro", required_argument, NULL, ARG_BIND_RO }, - { "tmpfs", required_argument, NULL, ARG_TMPFS }, - { "overlay", required_argument, NULL, ARG_OVERLAY }, - { "overlay-ro", required_argument, NULL, ARG_OVERLAY_RO }, - { "machine", required_argument, NULL, 'M' }, - { "slice", required_argument, NULL, 'S' }, - { "setenv", required_argument, NULL, 'E' }, - { "selinux-context", required_argument, NULL, 'Z' }, - { "selinux-apifs-context", required_argument, NULL, 'L' }, - { "quiet", no_argument, NULL, 'q' }, - { "share-system", no_argument, NULL, ARG_SHARE_SYSTEM }, /* not documented */ - { "register", required_argument, NULL, ARG_REGISTER }, - { "keep-unit", no_argument, NULL, ARG_KEEP_UNIT }, - { "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE }, - { "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN }, - { "network-ipvlan", required_argument, NULL, ARG_NETWORK_IPVLAN }, - { "network-veth", no_argument, NULL, 'n' }, - { "network-veth-extra", required_argument, NULL, ARG_NETWORK_VETH_EXTRA }, - { "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE }, - { "network-zone", required_argument, NULL, ARG_NETWORK_ZONE }, - { "personality", required_argument, NULL, ARG_PERSONALITY }, - { "image", required_argument, NULL, 'i' }, - { "volatile", optional_argument, NULL, ARG_VOLATILE }, - { "port", required_argument, NULL, 'p' }, - { "property", required_argument, NULL, ARG_PROPERTY }, - { "private-users", optional_argument, NULL, ARG_PRIVATE_USERS }, - { "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN }, - { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL }, - { "settings", required_argument, NULL, ARG_SETTINGS }, - { "chdir", required_argument, NULL, ARG_CHDIR }, - { "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT }, - { "notify-ready", required_argument, NULL, ARG_NOTIFY_READY }, - { "root-hash", required_argument, NULL, ARG_ROOT_HASH }, - { "system-call-filter", required_argument, NULL, ARG_SYSTEM_CALL_FILTER }, + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, + { "directory", required_argument, NULL, 'D' }, + { "template", required_argument, NULL, ARG_TEMPLATE }, + { "ephemeral", no_argument, NULL, 'x' }, + { "user", required_argument, NULL, 'u' }, + { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK }, + { "as-pid2", no_argument, NULL, 'a' }, + { "boot", no_argument, NULL, 'b' }, + { "uuid", required_argument, NULL, ARG_UUID }, + { "read-only", no_argument, NULL, ARG_READ_ONLY }, + { "capability", required_argument, NULL, ARG_CAPABILITY }, + { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY }, + { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL }, + { "bind", required_argument, NULL, ARG_BIND }, + { "bind-ro", required_argument, NULL, ARG_BIND_RO }, + { "tmpfs", required_argument, NULL, ARG_TMPFS }, + { "overlay", required_argument, NULL, ARG_OVERLAY }, + { "overlay-ro", required_argument, NULL, ARG_OVERLAY_RO }, + { "machine", required_argument, NULL, 'M' }, + { "slice", required_argument, NULL, 'S' }, + { "setenv", required_argument, NULL, 'E' }, + { "selinux-context", required_argument, NULL, 'Z' }, + { "selinux-apifs-context", required_argument, NULL, 'L' }, + { "quiet", no_argument, NULL, 'q' }, + { "share-system", no_argument, NULL, ARG_SHARE_SYSTEM }, /* not documented */ + { "register", required_argument, NULL, ARG_REGISTER }, + { "keep-unit", no_argument, NULL, ARG_KEEP_UNIT }, + { "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE }, + { "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN }, + { "network-ipvlan", required_argument, NULL, ARG_NETWORK_IPVLAN }, + { "network-veth", no_argument, NULL, 'n' }, + { "network-veth-extra", required_argument, NULL, ARG_NETWORK_VETH_EXTRA }, + { "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE }, + { "network-zone", required_argument, NULL, ARG_NETWORK_ZONE }, + { "network-namespace-path", required_argument, NULL, ARG_NETWORK_NAMESPACE_PATH }, + { "personality", required_argument, NULL, ARG_PERSONALITY }, + { "image", required_argument, NULL, 'i' }, + { "volatile", optional_argument, NULL, ARG_VOLATILE }, + { "port", required_argument, NULL, 'p' }, + { "property", required_argument, NULL, ARG_PROPERTY }, + { "private-users", optional_argument, NULL, ARG_PRIVATE_USERS }, + { "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN }, + { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL }, + { "settings", required_argument, NULL, ARG_SETTINGS }, + { "chdir", required_argument, NULL, ARG_CHDIR }, + { "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT }, + { "notify-ready", required_argument, NULL, ARG_NOTIFY_READY }, + { "root-hash", required_argument, NULL, ARG_ROOT_HASH }, + { "system-call-filter", required_argument, NULL, ARG_SYSTEM_CALL_FILTER }, {} }; @@ -644,6 +650,13 @@ static int parse_argv(int argc, char *argv[]) { arg_settings_mask |= SETTING_NETWORK; break; + case ARG_NETWORK_NAMESPACE_PATH: + r = parse_path_argument_and_warn(optarg, false, &arg_network_namespace_path); + if (r < 0) + return r; + + break; + case 'b': if (arg_start_mode == START_PID2) { log_error("--boot and --as-pid2 may not be combined."); @@ -1103,6 +1116,17 @@ static int parse_argv(int argc, char *argv[]) { assert_not_reached("Unhandled option"); } + /* If --network-namespace-path is given with any other network-related option, + * we need to error out, to avoid conflicts between different network options. */ + if (arg_network_namespace_path && + (arg_network_interfaces || arg_network_macvlan || + arg_network_ipvlan || arg_network_veth_extra || + arg_network_bridge || arg_network_zone || + arg_network_veth || arg_private_network)) { + log_error("--network-namespace-path cannot be combined with other network options."); + return -EINVAL; + } + parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_IPC", CLONE_NEWIPC); parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_PID", CLONE_NEWPID); parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_UTS", CLONE_NEWUTS); @@ -2532,12 +2556,14 @@ static int outer_child( int rtnl_socket, int uid_shift_socket, int unified_cgroup_hierarchy_socket, - FDSet *fds) { + FDSet *fds, + int netns_fd) { pid_t pid; ssize_t l; int r; _cleanup_close_ int fd = -1; + bool create_netns; assert(barrier); assert(directory); @@ -2788,9 +2814,11 @@ static int outer_child( if (fd < 0) return fd; + create_netns = !arg_network_namespace_path && arg_private_network; + pid = raw_clone(SIGCHLD|CLONE_NEWNS| arg_clone_ns_flags | - (arg_private_network ? CLONE_NEWNET : 0) | + (create_netns ? CLONE_NEWNET : 0) | (arg_userns_mode != USER_NAMESPACE_NO ? CLONE_NEWUSER : 0)); if (pid < 0) return log_error_errno(errno, "Failed to fork inner child: %m"); @@ -2804,6 +2832,12 @@ static int outer_child( * requested, so that we all are owned by the user if * user namespaces are turned on. */ + if (arg_network_namespace_path) { + r = namespace_enter(-1, -1, netns_fd, -1, -1); + if (r < 0) + return r; + } + r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds); if (r < 0) _exit(EXIT_FAILURE); @@ -2836,6 +2870,7 @@ static int outer_child( notify_socket = safe_close(notify_socket); kmsg_socket = safe_close(kmsg_socket); rtnl_socket = safe_close(rtnl_socket); + netns_fd = safe_close(netns_fd); return 0; } @@ -3311,6 +3346,7 @@ static int run(int master, int ifi = 0, r; ssize_t l; sigset_t mask_chld; + _cleanup_close_ int netns_fd = -1; assert_se(sigemptyset(&mask_chld) == 0); assert_se(sigaddset(&mask_chld, SIGCHLD) == 0); @@ -3365,6 +3401,20 @@ static int run(int master, if (r < 0) return log_error_errno(errno, "Failed to install SIGCHLD handler: %m"); + if (arg_network_namespace_path) { + netns_fd = open(arg_network_namespace_path, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (netns_fd < 0) + return log_error_errno(errno, "Cannot open file %s: %m", arg_network_namespace_path); + + r = fd_is_network_ns(netns_fd); + if (r < 0 && r != -ENOTTY) + return log_error_errno(r, "Failed to check %s fs type: %m", arg_network_namespace_path); + if (r == 0) { + log_error("Path %s doesn't refer to a network namespace", arg_network_namespace_path); + return -EINVAL; + } + } + *pid = raw_clone(SIGCHLD|CLONE_NEWNS); if (*pid < 0) return log_error_errno(errno, "clone() failed%s: %m", @@ -3401,7 +3451,8 @@ static int run(int master, rtnl_socket_pair[1], uid_shift_socket_pair[1], unified_cgroup_hierarchy_socket_pair[1], - fds); + fds, + netns_fd); if (r < 0) _exit(EXIT_FAILURE); diff --git a/test/TEST-13-NSPAWN-SMOKE/test.sh b/test/TEST-13-NSPAWN-SMOKE/test.sh index 7f7380fd626..239c7e0731e 100755 --- a/test/TEST-13-NSPAWN-SMOKE/test.sh +++ b/test/TEST-13-NSPAWN-SMOKE/test.sh @@ -18,12 +18,12 @@ test_setup() { eval $(udevadm info --export --query=env --name=${LOOPDEV}p2) setup_basic_environment - dracut_install busybox chmod rmdir unshare + dracut_install busybox chmod rmdir unshare ip cp create-busybox-container $initdir/ ./create-busybox-container $initdir/nc-container - initdir="$initdir/nc-container" dracut_install nc + initdir="$initdir/nc-container" dracut_install nc ip # setup the testsuite service cat >$initdir/etc/systemd/system/testsuite.service <$initdir/test-nspawn.sh <<'EOF' @@ -107,6 +109,52 @@ function run { [[ "$is_user_ns_supported" = "no" && "$3" = "yes" ]] && return 1 fi + local _netns_opt="--network-namespace-path=/proc/self/ns/net" + + # --network-namespace-path and network-related options cannot be used together + if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-interface=lo -b; then + return 1 + fi + + if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-macvlan=lo -b; then + return 1 + fi + + if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-ipvlan=lo -b; then + return 1 + fi + + if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-veth -b; then + return 1 + fi + + if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-veth-extra=lo -b; then + return 1 + fi + + if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-bridge=lo -b; then + return 1 + fi + + if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --network-zone=zone -b; then + return 1 + fi + + if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" --private-network -b; then + return 1 + fi + + # test --network-namespace-path works with a network namespace created by "ip netns" + ip netns add nspawn_test + _netns_opt="--network-namespace-path=/run/netns/nspawn_test" + UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" "$_netns_opt" ip a | grep -E '^1: lo.*DOWN' + local r=$? + ip netns del nspawn_test + + if [ $r -ne 0 ]; then + return 1 + fi + return 0 }