2019-06-01 16:08:55 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2016-07-31 02:58:49 +08:00
|
|
|
|
|
|
|
#include <linux/stat.h>
|
|
|
|
#include <linux/sysctl.h>
|
|
|
|
#include <linux/slab.h>
|
2017-02-03 00:54:15 +08:00
|
|
|
#include <linux/cred.h>
|
2016-08-09 02:54:50 +08:00
|
|
|
#include <linux/hash.h>
|
2018-04-06 07:25:34 +08:00
|
|
|
#include <linux/kmemleak.h>
|
2016-07-31 02:58:49 +08:00
|
|
|
#include <linux/user_namespace.h>
|
|
|
|
|
2021-04-22 20:27:09 +08:00
|
|
|
struct ucounts init_ucounts = {
|
|
|
|
.ns = &init_user_ns,
|
|
|
|
.uid = GLOBAL_ROOT_UID,
|
2021-04-22 20:27:10 +08:00
|
|
|
.count = ATOMIC_INIT(1),
|
2021-04-22 20:27:09 +08:00
|
|
|
};
|
|
|
|
|
2016-08-09 02:54:50 +08:00
|
|
|
#define UCOUNTS_HASHTABLE_BITS 10
|
|
|
|
static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
|
|
|
|
static DEFINE_SPINLOCK(ucounts_lock);
|
|
|
|
|
|
|
|
#define ucounts_hashfn(ns, uid) \
|
|
|
|
hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \
|
|
|
|
UCOUNTS_HASHTABLE_BITS)
|
|
|
|
#define ucounts_hashentry(ns, uid) \
|
|
|
|
(ucounts_hashtable + ucounts_hashfn(ns, uid))
|
|
|
|
|
|
|
|
|
2016-07-31 02:58:49 +08:00
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
static struct ctl_table_set *
|
|
|
|
set_lookup(struct ctl_table_root *root)
|
|
|
|
{
|
|
|
|
return ¤t_user_ns()->set;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int set_is_seen(struct ctl_table_set *set)
|
|
|
|
{
|
|
|
|
return ¤t_user_ns()->set == set;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int set_permissions(struct ctl_table_header *head,
|
|
|
|
struct ctl_table *table)
|
|
|
|
{
|
|
|
|
struct user_namespace *user_ns =
|
|
|
|
container_of(head->set, struct user_namespace, set);
|
|
|
|
int mode;
|
|
|
|
|
|
|
|
/* Allow users with CAP_SYS_RESOURCE unrestrained access */
|
|
|
|
if (ns_capable(user_ns, CAP_SYS_RESOURCE))
|
|
|
|
mode = (table->mode & S_IRWXU) >> 6;
|
|
|
|
else
|
|
|
|
/* Allow all others at most read-only access */
|
|
|
|
mode = table->mode & S_IROTH;
|
|
|
|
return (mode << 6) | (mode << 3) | mode;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ctl_table_root set_root = {
|
|
|
|
.lookup = set_lookup,
|
|
|
|
.permissions = set_permissions,
|
|
|
|
};
|
|
|
|
|
2016-12-14 21:56:33 +08:00
|
|
|
#define UCOUNT_ENTRY(name) \
|
2016-08-09 03:41:52 +08:00
|
|
|
{ \
|
|
|
|
.procname = name, \
|
|
|
|
.maxlen = sizeof(int), \
|
|
|
|
.mode = 0644, \
|
|
|
|
.proc_handler = proc_dointvec_minmax, \
|
proc/sysctl: add shared variables for range check
In the sysctl code the proc_dointvec_minmax() function is often used to
validate the user supplied value between an allowed range. This
function uses the extra1 and extra2 members from struct ctl_table as
minimum and maximum allowed value.
On sysctl handler declaration, in every source file there are some
readonly variables containing just an integer which address is assigned
to the extra1 and extra2 members, so the sysctl range is enforced.
The special values 0, 1 and INT_MAX are very often used as range
boundary, leading duplication of variables like zero=0, one=1,
int_max=INT_MAX in different source files:
$ git grep -E '\.extra[12].*&(zero|one|int_max)' |wc -l
248
Add a const int array containing the most commonly used values, some
macros to refer more easily to the correct array member, and use them
instead of creating a local one for every object file.
This is the bloat-o-meter output comparing the old and new binary
compiled with the default Fedora config:
# scripts/bloat-o-meter -d vmlinux.o.old vmlinux.o
add/remove: 2/2 grow/shrink: 0/2 up/down: 24/-188 (-164)
Data old new delta
sysctl_vals - 12 +12
__kstrtab_sysctl_vals - 12 +12
max 14 10 -4
int_max 16 - -16
one 68 - -68
zero 128 28 -100
Total: Before=20583249, After=20583085, chg -0.00%
[mcroce@redhat.com: tipc: remove two unused variables]
Link: http://lkml.kernel.org/r/20190530091952.4108-1-mcroce@redhat.com
[akpm@linux-foundation.org: fix net/ipv6/sysctl_net_ipv6.c]
[arnd@arndb.de: proc/sysctl: make firmware loader table conditional]
Link: http://lkml.kernel.org/r/20190617130014.1713870-1-arnd@arndb.de
[akpm@linux-foundation.org: fix fs/eventpoll.c]
Link: http://lkml.kernel.org/r/20190430180111.10688-1-mcroce@redhat.com
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-19 06:58:50 +08:00
|
|
|
.extra1 = SYSCTL_ZERO, \
|
|
|
|
.extra2 = SYSCTL_INT_MAX, \
|
2016-08-09 03:41:52 +08:00
|
|
|
}
|
2016-08-09 02:54:50 +08:00
|
|
|
static struct ctl_table user_table[] = {
|
2016-08-09 03:41:52 +08:00
|
|
|
UCOUNT_ENTRY("max_user_namespaces"),
|
2016-08-09 03:08:36 +08:00
|
|
|
UCOUNT_ENTRY("max_pid_namespaces"),
|
2016-08-09 03:11:25 +08:00
|
|
|
UCOUNT_ENTRY("max_uts_namespaces"),
|
2016-08-09 03:20:23 +08:00
|
|
|
UCOUNT_ENTRY("max_ipc_namespaces"),
|
2016-08-09 03:33:23 +08:00
|
|
|
UCOUNT_ENTRY("max_net_namespaces"),
|
2016-08-09 03:37:37 +08:00
|
|
|
UCOUNT_ENTRY("max_mnt_namespaces"),
|
2016-08-09 03:25:30 +08:00
|
|
|
UCOUNT_ENTRY("max_cgroup_namespaces"),
|
2020-04-07 01:13:42 +08:00
|
|
|
UCOUNT_ENTRY("max_time_namespaces"),
|
2016-12-14 21:56:33 +08:00
|
|
|
#ifdef CONFIG_INOTIFY_USER
|
|
|
|
UCOUNT_ENTRY("max_inotify_instances"),
|
|
|
|
UCOUNT_ENTRY("max_inotify_watches"),
|
|
|
|
#endif
|
2021-04-22 20:27:11 +08:00
|
|
|
{ },
|
2016-07-31 02:58:49 +08:00
|
|
|
{ }
|
|
|
|
};
|
|
|
|
#endif /* CONFIG_SYSCTL */
|
|
|
|
|
|
|
|
bool setup_userns_sysctls(struct user_namespace *ns)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
struct ctl_table *tbl;
|
2020-04-07 23:46:43 +08:00
|
|
|
|
|
|
|
BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1);
|
2016-07-31 02:58:49 +08:00
|
|
|
setup_sysctl_set(&ns->set, &set_root, set_is_seen);
|
2016-08-09 02:54:50 +08:00
|
|
|
tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
|
2016-07-31 02:58:49 +08:00
|
|
|
if (tbl) {
|
2016-08-09 03:41:52 +08:00
|
|
|
int i;
|
|
|
|
for (i = 0; i < UCOUNT_COUNTS; i++) {
|
|
|
|
tbl[i].data = &ns->ucount_max[i];
|
|
|
|
}
|
2016-08-09 02:54:50 +08:00
|
|
|
ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl);
|
2016-07-31 02:58:49 +08:00
|
|
|
}
|
|
|
|
if (!ns->sysctls) {
|
|
|
|
kfree(tbl);
|
|
|
|
retire_sysctl_set(&ns->set);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void retire_userns_sysctls(struct user_namespace *ns)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
struct ctl_table *tbl;
|
|
|
|
|
|
|
|
tbl = ns->sysctls->ctl_table_arg;
|
|
|
|
unregister_sysctl_table(ns->sysctls);
|
|
|
|
retire_sysctl_set(&ns->set);
|
|
|
|
kfree(tbl);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-08-09 02:54:50 +08:00
|
|
|
static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
|
|
|
|
{
|
|
|
|
struct ucounts *ucounts;
|
|
|
|
|
|
|
|
hlist_for_each_entry(ucounts, hashent, node) {
|
|
|
|
if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
|
|
|
|
return ucounts;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-04-22 20:27:09 +08:00
|
|
|
static void hlist_add_ucounts(struct ucounts *ucounts)
|
|
|
|
{
|
|
|
|
struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
|
|
|
|
spin_lock_irq(&ucounts_lock);
|
|
|
|
hlist_add_head(&ucounts->node, hashent);
|
|
|
|
spin_unlock_irq(&ucounts_lock);
|
|
|
|
}
|
|
|
|
|
2021-04-22 20:27:10 +08:00
|
|
|
struct ucounts *get_ucounts(struct ucounts *ucounts)
|
|
|
|
{
|
|
|
|
if (ucounts && atomic_add_negative(1, &ucounts->count)) {
|
|
|
|
put_ucounts(ucounts);
|
|
|
|
ucounts = NULL;
|
|
|
|
}
|
|
|
|
return ucounts;
|
|
|
|
}
|
|
|
|
|
2021-04-22 20:27:09 +08:00
|
|
|
struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
|
2016-08-09 02:54:50 +08:00
|
|
|
{
|
|
|
|
struct hlist_head *hashent = ucounts_hashentry(ns, uid);
|
|
|
|
struct ucounts *ucounts, *new;
|
|
|
|
|
2017-01-20 21:21:35 +08:00
|
|
|
spin_lock_irq(&ucounts_lock);
|
2016-08-09 02:54:50 +08:00
|
|
|
ucounts = find_ucounts(ns, uid, hashent);
|
|
|
|
if (!ucounts) {
|
2017-01-20 21:21:35 +08:00
|
|
|
spin_unlock_irq(&ucounts_lock);
|
2016-08-09 02:54:50 +08:00
|
|
|
|
|
|
|
new = kzalloc(sizeof(*new), GFP_KERNEL);
|
|
|
|
if (!new)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
new->ns = ns;
|
|
|
|
new->uid = uid;
|
2021-04-22 20:27:10 +08:00
|
|
|
atomic_set(&new->count, 1);
|
2016-08-09 02:54:50 +08:00
|
|
|
|
2017-01-20 21:21:35 +08:00
|
|
|
spin_lock_irq(&ucounts_lock);
|
2016-08-09 02:54:50 +08:00
|
|
|
ucounts = find_ucounts(ns, uid, hashent);
|
|
|
|
if (ucounts) {
|
|
|
|
kfree(new);
|
|
|
|
} else {
|
|
|
|
hlist_add_head(&new->node, hashent);
|
2021-04-22 20:27:10 +08:00
|
|
|
spin_unlock_irq(&ucounts_lock);
|
|
|
|
return new;
|
2016-08-09 02:54:50 +08:00
|
|
|
}
|
|
|
|
}
|
2017-01-20 21:21:35 +08:00
|
|
|
spin_unlock_irq(&ucounts_lock);
|
2021-04-22 20:27:10 +08:00
|
|
|
ucounts = get_ucounts(ucounts);
|
2021-04-22 20:27:09 +08:00
|
|
|
return ucounts;
|
|
|
|
}
|
|
|
|
|
|
|
|
void put_ucounts(struct ucounts *ucounts)
|
2016-08-09 02:54:50 +08:00
|
|
|
{
|
2017-01-20 21:21:35 +08:00
|
|
|
unsigned long flags;
|
|
|
|
|
2021-04-22 20:27:10 +08:00
|
|
|
if (atomic_dec_and_test(&ucounts->count)) {
|
|
|
|
spin_lock_irqsave(&ucounts_lock, flags);
|
2016-08-09 02:54:50 +08:00
|
|
|
hlist_del_init(&ucounts->node);
|
2021-04-22 20:27:10 +08:00
|
|
|
spin_unlock_irqrestore(&ucounts_lock, flags);
|
|
|
|
kfree(ucounts);
|
|
|
|
}
|
2016-08-09 02:54:50 +08:00
|
|
|
}
|
|
|
|
|
2021-04-22 20:27:08 +08:00
|
|
|
static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
|
2016-08-09 02:41:24 +08:00
|
|
|
{
|
2021-04-22 20:27:08 +08:00
|
|
|
long c, old;
|
|
|
|
c = atomic_long_read(v);
|
2016-08-09 02:41:24 +08:00
|
|
|
for (;;) {
|
|
|
|
if (unlikely(c >= u))
|
|
|
|
return false;
|
2021-04-22 20:27:08 +08:00
|
|
|
old = atomic_long_cmpxchg(v, c, c+1);
|
2016-08-09 02:41:24 +08:00
|
|
|
if (likely(old == c))
|
|
|
|
return true;
|
|
|
|
c = old;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-09 03:41:52 +08:00
|
|
|
struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
|
|
|
|
enum ucount_type type)
|
2016-08-09 02:41:24 +08:00
|
|
|
{
|
2016-08-09 02:54:50 +08:00
|
|
|
struct ucounts *ucounts, *iter, *bad;
|
|
|
|
struct user_namespace *tns;
|
2021-04-22 20:27:09 +08:00
|
|
|
ucounts = alloc_ucounts(ns, uid);
|
2016-08-09 02:54:50 +08:00
|
|
|
for (iter = ucounts; iter; iter = tns->ucounts) {
|
2021-04-22 20:27:08 +08:00
|
|
|
long max;
|
2016-08-09 02:54:50 +08:00
|
|
|
tns = iter->ns;
|
2016-08-09 03:41:52 +08:00
|
|
|
max = READ_ONCE(tns->ucount_max[type]);
|
2021-04-22 20:27:08 +08:00
|
|
|
if (!atomic_long_inc_below(&iter->ucount[type], max))
|
2016-08-09 02:41:24 +08:00
|
|
|
goto fail;
|
|
|
|
}
|
2016-08-09 02:54:50 +08:00
|
|
|
return ucounts;
|
2016-08-09 02:41:24 +08:00
|
|
|
fail:
|
2016-08-09 02:54:50 +08:00
|
|
|
bad = iter;
|
|
|
|
for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
|
2021-04-22 20:27:08 +08:00
|
|
|
atomic_long_dec(&iter->ucount[type]);
|
2016-08-09 02:41:24 +08:00
|
|
|
|
2016-08-09 02:54:50 +08:00
|
|
|
put_ucounts(ucounts);
|
|
|
|
return NULL;
|
2016-08-09 02:41:24 +08:00
|
|
|
}
|
|
|
|
|
2016-08-09 03:41:52 +08:00
|
|
|
void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
|
2016-08-09 02:41:24 +08:00
|
|
|
{
|
2016-08-09 02:54:50 +08:00
|
|
|
struct ucounts *iter;
|
|
|
|
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
|
2021-04-22 20:27:08 +08:00
|
|
|
long dec = atomic_long_dec_if_positive(&iter->ucount[type]);
|
2016-08-09 02:41:24 +08:00
|
|
|
WARN_ON_ONCE(dec < 0);
|
|
|
|
}
|
2016-08-09 02:54:50 +08:00
|
|
|
put_ucounts(ucounts);
|
2016-08-09 02:41:24 +08:00
|
|
|
}
|
|
|
|
|
2021-04-22 20:27:11 +08:00
|
|
|
long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
|
|
|
|
{
|
|
|
|
struct ucounts *iter;
|
|
|
|
long ret = 0;
|
|
|
|
|
|
|
|
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
|
|
|
|
long max = READ_ONCE(iter->ns->ucount_max[type]);
|
|
|
|
long new = atomic_long_add_return(v, &iter->ucount[type]);
|
|
|
|
if (new < 0 || new > max)
|
|
|
|
ret = LONG_MAX;
|
|
|
|
else if (iter == ucounts)
|
|
|
|
ret = new;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
|
|
|
|
{
|
|
|
|
struct ucounts *iter;
|
|
|
|
long new;
|
|
|
|
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
|
|
|
|
long dec = atomic_long_add_return(-v, &iter->ucount[type]);
|
|
|
|
WARN_ON_ONCE(dec < 0);
|
|
|
|
if (iter == ucounts)
|
|
|
|
new = dec;
|
|
|
|
}
|
|
|
|
return (new == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
|
|
|
|
{
|
|
|
|
struct ucounts *iter;
|
|
|
|
if (get_ucounts_value(ucounts, type) > max)
|
|
|
|
return true;
|
|
|
|
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
|
|
|
|
max = READ_ONCE(iter->ns->ucount_max[type]);
|
|
|
|
if (get_ucounts_value(iter, type) > max)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-07-31 02:58:49 +08:00
|
|
|
static __init int user_namespace_sysctl_init(void)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_SYSCTL
|
2016-08-09 02:54:50 +08:00
|
|
|
static struct ctl_table_header *user_header;
|
2016-07-31 02:58:49 +08:00
|
|
|
static struct ctl_table empty[1];
|
|
|
|
/*
|
2016-08-09 02:54:50 +08:00
|
|
|
* It is necessary to register the user directory in the
|
2016-07-31 02:58:49 +08:00
|
|
|
* default set so that registrations in the child sets work
|
|
|
|
* properly.
|
|
|
|
*/
|
2016-08-09 02:54:50 +08:00
|
|
|
user_header = register_sysctl("user", empty);
|
2017-02-09 06:30:50 +08:00
|
|
|
kmemleak_ignore(user_header);
|
2016-08-09 02:54:50 +08:00
|
|
|
BUG_ON(!user_header);
|
2016-07-31 02:58:49 +08:00
|
|
|
BUG_ON(!setup_userns_sysctls(&init_user_ns));
|
|
|
|
#endif
|
2021-04-22 20:27:09 +08:00
|
|
|
hlist_add_ucounts(&init_ucounts);
|
2021-04-22 20:27:11 +08:00
|
|
|
inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1);
|
2016-07-31 02:58:49 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
subsys_initcall(user_namespace_sysctl_init);
|