mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-30 23:54:04 +08:00
2ddd3cac1f
atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nsproxy.count is used as pure reference counter. Convert it to refcount_t and fix up the operations. **Important note for maintainers: Some functions from refcount_t API defined in refcount.h have different memory ordering guarantees than their atomic counterparts. Please check Documentation/core-api/refcount-vs-atomic.rst for more information. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the nsproxy.count it might make a difference in following places: - put_nsproxy() and switch_task_namespaces(): decrement in refcount_dec_and_test() only provides RELEASE ordering and ACQUIRE ordering on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook <keescook@chromium.org> Signed-off-by: Elena Reshetova <elena.reshetova@intel.com> Reviewed-by: David Windsor <dwindsor@gmail.com> Reviewed-by: Hans Liljestrand <ishkamiel@gmail.com> Reviewed-by: Christian Brauner <brauner@kernel.org> Link: https://lore.kernel.org/r/20230818041327.gonna.210-kees@kernel.org Signed-off-by: Kees Cook <keescook@chromium.org>
115 lines
3.1 KiB
C
115 lines
3.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_NSPROXY_H
|
|
#define _LINUX_NSPROXY_H
|
|
|
|
#include <linux/spinlock.h>
|
|
#include <linux/sched.h>
|
|
|
|
struct mnt_namespace;
|
|
struct uts_namespace;
|
|
struct ipc_namespace;
|
|
struct pid_namespace;
|
|
struct cgroup_namespace;
|
|
struct fs_struct;
|
|
|
|
/*
|
|
* A structure to contain pointers to all per-process
|
|
* namespaces - fs (mount), uts, network, sysvipc, etc.
|
|
*
|
|
* The pid namespace is an exception -- it's accessed using
|
|
* task_active_pid_ns. The pid namespace here is the
|
|
* namespace that children will use.
|
|
*
|
|
* 'count' is the number of tasks holding a reference.
|
|
* The count for each namespace, then, will be the number
|
|
* of nsproxies pointing to it, not the number of tasks.
|
|
*
|
|
* The nsproxy is shared by tasks which share all namespaces.
|
|
* As soon as a single namespace is cloned or unshared, the
|
|
* nsproxy is copied.
|
|
*/
|
|
struct nsproxy {
|
|
refcount_t count;
|
|
struct uts_namespace *uts_ns;
|
|
struct ipc_namespace *ipc_ns;
|
|
struct mnt_namespace *mnt_ns;
|
|
struct pid_namespace *pid_ns_for_children;
|
|
struct net *net_ns;
|
|
struct time_namespace *time_ns;
|
|
struct time_namespace *time_ns_for_children;
|
|
struct cgroup_namespace *cgroup_ns;
|
|
};
|
|
extern struct nsproxy init_nsproxy;
|
|
|
|
/*
|
|
* A structure to encompass all bits needed to install
|
|
* a partial or complete new set of namespaces.
|
|
*
|
|
* If a new user namespace is requested cred will
|
|
* point to a modifiable set of credentials. If a pointer
|
|
* to a modifiable set is needed nsset_cred() must be
|
|
* used and tested.
|
|
*/
|
|
struct nsset {
|
|
unsigned flags;
|
|
struct nsproxy *nsproxy;
|
|
struct fs_struct *fs;
|
|
const struct cred *cred;
|
|
};
|
|
|
|
static inline struct cred *nsset_cred(struct nsset *set)
|
|
{
|
|
if (set->flags & CLONE_NEWUSER)
|
|
return (struct cred *)set->cred;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* the namespaces access rules are:
|
|
*
|
|
* 1. only current task is allowed to change tsk->nsproxy pointer or
|
|
* any pointer on the nsproxy itself. Current must hold the task_lock
|
|
* when changing tsk->nsproxy.
|
|
*
|
|
* 2. when accessing (i.e. reading) current task's namespaces - no
|
|
* precautions should be taken - just dereference the pointers
|
|
*
|
|
* 3. the access to other task namespaces is performed like this
|
|
* task_lock(task);
|
|
* nsproxy = task->nsproxy;
|
|
* if (nsproxy != NULL) {
|
|
* / *
|
|
* * work with the namespaces here
|
|
* * e.g. get the reference on one of them
|
|
* * /
|
|
* } / *
|
|
* * NULL task->nsproxy means that this task is
|
|
* * almost dead (zombie)
|
|
* * /
|
|
* task_unlock(task);
|
|
*
|
|
*/
|
|
|
|
int copy_namespaces(unsigned long flags, struct task_struct *tsk);
|
|
void exit_task_namespaces(struct task_struct *tsk);
|
|
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
|
|
int exec_task_namespaces(void);
|
|
void free_nsproxy(struct nsproxy *ns);
|
|
int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
|
|
struct cred *, struct fs_struct *);
|
|
int __init nsproxy_cache_init(void);
|
|
|
|
static inline void put_nsproxy(struct nsproxy *ns)
|
|
{
|
|
if (refcount_dec_and_test(&ns->count))
|
|
free_nsproxy(ns);
|
|
}
|
|
|
|
static inline void get_nsproxy(struct nsproxy *ns)
|
|
{
|
|
refcount_inc(&ns->count);
|
|
}
|
|
|
|
#endif
|