linux/include/linux/user_namespace.h
Eric W. Biederman 15bc01effe ucounts: Fix signal ucount refcounting
In commit fda31c5029 ("signal: avoid double atomic counter
increments for user accounting") Linus made a clever optimization to
how rlimits and the struct user_struct.  Unfortunately that
optimization does not work in the obvious way when moved to nested
rlimits.  The problem is that the last decrement of the per user
namespace per user sigpending counter might also be the last decrement
of the sigpending counter in the parent user namespace as well.  Which
means that simply freeing the leaf ucount in __free_sigqueue is not
enough.

Maintain the optimization and handle the tricky cases by introducing
inc_rlimit_get_ucounts and dec_rlimit_put_ucounts.

By moving the entire optimization into functions that perform all of
the work it becomes possible to ensure that every level is handled
properly.

The new function inc_rlimit_get_ucounts returns 0 on failure to
increment the ucount.  This is different than inc_rlimit_ucounts which
increments the ucounts and returns LONG_MAX if the ucount counter has
exceeded it's maximum or it wrapped (to indicate the counter needs to
decremented).

I wish we had a single user to account all pending signals to across
all of the threads of a process so this complexity was not necessary

Cc: stable@vger.kernel.org
Fixes: d646969055 ("Reimplement RLIMIT_SIGPENDING on top of ucounts")
v1: https://lkml.kernel.org/r/87mtnavszx.fsf_-_@disp2133
Link: https://lkml.kernel.org/r/87fssytizw.fsf_-_@disp2133
Reviewed-by: Alexey Gladkov <legion@kernel.org>
Tested-by: Rune Kleveland <rune.kleveland@infomedia.dk>
Tested-by: Yu Zhao <yuzhao@google.com>
Tested-by: Jordan Glover <Golden_Miller83@protonmail.ch>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
2021-10-18 16:02:30 -05:00

220 lines
6.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_USER_NAMESPACE_H
#define _LINUX_USER_NAMESPACE_H
#include <linux/kref.h>
#include <linux/nsproxy.h>
#include <linux/ns_common.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/rwsem.h>
#include <linux/sysctl.h>
#include <linux/err.h>
#define UID_GID_MAP_MAX_BASE_EXTENTS 5
#define UID_GID_MAP_MAX_EXTENTS 340
struct uid_gid_extent {
u32 first;
u32 lower_first;
u32 count;
};
struct uid_gid_map { /* 64 bytes -- 1 cache line */
u32 nr_extents;
union {
struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS];
struct {
struct uid_gid_extent *forward;
struct uid_gid_extent *reverse;
};
};
};
#define USERNS_SETGROUPS_ALLOWED 1UL
#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
struct ucounts;
enum ucount_type {
UCOUNT_USER_NAMESPACES,
UCOUNT_PID_NAMESPACES,
UCOUNT_UTS_NAMESPACES,
UCOUNT_IPC_NAMESPACES,
UCOUNT_NET_NAMESPACES,
UCOUNT_MNT_NAMESPACES,
UCOUNT_CGROUP_NAMESPACES,
UCOUNT_TIME_NAMESPACES,
#ifdef CONFIG_INOTIFY_USER
UCOUNT_INOTIFY_INSTANCES,
UCOUNT_INOTIFY_WATCHES,
#endif
#ifdef CONFIG_FANOTIFY
UCOUNT_FANOTIFY_GROUPS,
UCOUNT_FANOTIFY_MARKS,
#endif
UCOUNT_RLIMIT_NPROC,
UCOUNT_RLIMIT_MSGQUEUE,
UCOUNT_RLIMIT_SIGPENDING,
UCOUNT_RLIMIT_MEMLOCK,
UCOUNT_COUNTS,
};
#define MAX_PER_NAMESPACE_UCOUNTS UCOUNT_RLIMIT_NPROC
struct user_namespace {
struct uid_gid_map uid_map;
struct uid_gid_map gid_map;
struct uid_gid_map projid_map;
struct user_namespace *parent;
int level;
kuid_t owner;
kgid_t group;
struct ns_common ns;
unsigned long flags;
/* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP
* in its effective capability set at the child ns creation time. */
bool parent_could_setfcap;
#ifdef CONFIG_KEYS
/* List of joinable keyrings in this namespace. Modification access of
* these pointers is controlled by keyring_sem. Once
* user_keyring_register is set, it won't be changed, so it can be
* accessed directly with READ_ONCE().
*/
struct list_head keyring_name_list;
struct key *user_keyring_register;
struct rw_semaphore keyring_sem;
#endif
/* Register of per-UID persistent keyrings for this namespace */
#ifdef CONFIG_PERSISTENT_KEYRINGS
struct key *persistent_keyring_register;
#endif
struct work_struct work;
#ifdef CONFIG_SYSCTL
struct ctl_table_set set;
struct ctl_table_header *sysctls;
#endif
struct ucounts *ucounts;
long ucount_max[UCOUNT_COUNTS];
} __randomize_layout;
struct ucounts {
struct hlist_node node;
struct user_namespace *ns;
kuid_t uid;
atomic_t count;
atomic_long_t ucount[UCOUNT_COUNTS];
};
extern struct user_namespace init_user_ns;
extern struct ucounts init_ucounts;
bool setup_userns_sysctls(struct user_namespace *ns);
void retire_userns_sysctls(struct user_namespace *ns);
struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
void put_ucounts(struct ucounts *ucounts);
static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type type)
{
return atomic_long_read(&ucounts->ucount[type]);
}
long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type);
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type);
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
static inline void set_rlimit_ucount_max(struct user_namespace *ns,
enum ucount_type type, unsigned long max)
{
ns->ucount_max[type] = max <= LONG_MAX ? max : LONG_MAX;
}
#ifdef CONFIG_USER_NS
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
refcount_inc(&ns->ns.count);
return ns;
}
extern int create_user_ns(struct cred *new);
extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
extern void __put_user_ns(struct user_namespace *ns);
static inline void put_user_ns(struct user_namespace *ns)
{
if (ns && refcount_dec_and_test(&ns->ns.count))
__put_user_ns(ns);
}
struct seq_operations;
extern const struct seq_operations proc_uid_seq_operations;
extern const struct seq_operations proc_gid_seq_operations;
extern const struct seq_operations proc_projid_seq_operations;
extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
extern int proc_setgroups_show(struct seq_file *m, void *v);
extern bool userns_may_setgroups(const struct user_namespace *ns);
extern bool in_userns(const struct user_namespace *ancestor,
const struct user_namespace *child);
extern bool current_in_userns(const struct user_namespace *target_ns);
struct ns_common *ns_get_owner(struct ns_common *ns);
#else
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
return &init_user_ns;
}
static inline int create_user_ns(struct cred *new)
{
return -EINVAL;
}
static inline int unshare_userns(unsigned long unshare_flags,
struct cred **new_cred)
{
if (unshare_flags & CLONE_NEWUSER)
return -EINVAL;
return 0;
}
static inline void put_user_ns(struct user_namespace *ns)
{
}
static inline bool userns_may_setgroups(const struct user_namespace *ns)
{
return true;
}
static inline bool in_userns(const struct user_namespace *ancestor,
const struct user_namespace *child)
{
return true;
}
static inline bool current_in_userns(const struct user_namespace *target_ns)
{
return true;
}
static inline struct ns_common *ns_get_owner(struct ns_common *ns)
{
return ERR_PTR(-EPERM);
}
#endif
#endif /* _LINUX_USER_H */