linux/security/selinux/selinuxfs.c

2168 lines
49 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/* Updated: Karl MacMillan <kmacmillan@tresys.com>
*
* Added conditional policy language extensions
*
* Updated: Hewlett-Packard <paul@paul-moore.com>
*
* Added support for the policy capability bitmap
*
* Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
* Copyright (C) 2003 - 2004 Tresys Technology, LLC
* Copyright (C) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
*/
#include <linux/kernel.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <linux/fs_context.h>
#include <linux/mount.h>
#include <linux/mutex.h>
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
#include <linux/namei.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/security.h>
#include <linux/major.h>
#include <linux/seq_file.h>
#include <linux/percpu.h>
#include <linux/audit.h>
#include <linux/uaccess.h>
#include <linux/kobject.h>
#include <linux/ctype.h>
/* selinuxfs pseudo filesystem for exporting the security policy API.
Based on the proc code and the fs/nfsd/nfsctl.c code. */
#include "flask.h"
#include "avc.h"
#include "avc_ss.h"
#include "security.h"
#include "objsec.h"
#include "conditional.h"
selinux: measure state and policy capabilities SELinux stores the configuration state and the policy capabilities in kernel memory. Changes to this data at runtime would have an impact on the security guarantees provided by SELinux. Measuring this data through IMA subsystem provides a tamper-resistant way for an attestation service to remotely validate it at runtime. Measure the configuration state and policy capabilities by calling the IMA hook ima_measure_critical_data(). To enable SELinux data measurement, the following steps are required: 1, Add "ima_policy=critical_data" to the kernel command line arguments to enable measuring SELinux data at boot time. For example, BOOT_IMAGE=/boot/vmlinuz-5.11.0-rc3+ root=UUID=fd643309-a5d2-4ed3-b10d-3c579a5fab2f ro nomodeset security=selinux ima_policy=critical_data 2, Add the following rule to /etc/ima/ima-policy measure func=CRITICAL_DATA label=selinux Sample measurement of SELinux state and policy capabilities: 10 2122...65d8 ima-buf sha256:13c2...1292 selinux-state 696e...303b Execute the following command to extract the measured data from the IMA's runtime measurements list: grep "selinux-state" /sys/kernel/security/integrity/ima/ascii_runtime_measurements | tail -1 | cut -d' ' -f 6 | xxd -r -p The output should be a list of key-value pairs. For example, initialized=1;enforcing=0;checkreqprot=1;network_peer_controls=1;open_perms=1;extended_socket_class=1;always_check_network=0;cgroup_seclabel=1;nnp_nosuid_transition=1;genfs_seclabel_symlinks=0; To verify the measurement is consistent with the current SELinux state reported on the system, compare the integer values in the following files with those set in the IMA measurement (using the following commands): - cat /sys/fs/selinux/enforce - cat /sys/fs/selinux/checkreqprot - cat /sys/fs/selinux/policy_capabilities/[capability_file] Note that the actual verification would be against an expected state and done on a separate system (likely an attestation server) requiring "initialized=1;enforcing=1;checkreqprot=0;" for a secure state and then whatever policy capabilities are actually set in the expected policy (which can be extracted from the policy itself via seinfo, for example). Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com> Suggested-by: Stephen Smalley <stephen.smalley.work@gmail.com> Suggested-by: Paul Moore <paul@paul-moore.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2021-02-13 00:37:09 +08:00
#include "ima.h"
enum sel_inos {
SEL_ROOT_INO = 2,
SEL_LOAD, /* load policy */
SEL_ENFORCE, /* get or set enforcing status */
SEL_CONTEXT, /* validate context */
SEL_ACCESS, /* compute access decision */
SEL_CREATE, /* compute create labeling decision */
SEL_RELABEL, /* compute relabeling decision */
SEL_USER, /* compute reachable user contexts */
SEL_POLICYVERS, /* return policy version for this kernel */
SEL_COMMIT_BOOLS, /* commit new boolean values */
SEL_MLS, /* return if MLS policy is enabled */
SEL_DISABLE, /* disable SELinux until next reboot */
SEL_MEMBER, /* compute polyinstantiation membership decision */
SEL_CHECKREQPROT, /* check requested protection, not kernel-applied one */
SEL_COMPAT_NET, /* whether to use old compat network packet controls */
SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */
SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */
selinux: fast status update interface (/selinux/status) This patch provides a new /selinux/status entry which allows applications read-only mmap(2). This region reflects selinux_kernel_status structure in kernel space. struct selinux_kernel_status { u32 length; /* length of this structure */ u32 sequence; /* sequence number of seqlock logic */ u32 enforcing; /* current setting of enforcing mode */ u32 policyload; /* times of policy reloaded */ u32 deny_unknown; /* current setting of deny_unknown */ }; When userspace object manager caches access control decisions provided by SELinux, it needs to invalidate the cache on policy reload and setenforce to keep consistency. However, the applications need to check the kernel state for each accesses on userspace avc, or launch a background worker process. In heuristic, frequency of invalidation is much less than frequency of making access control decision, so it is annoying to invoke a system call to check we don't need to invalidate the userspace cache. If we can use a background worker thread, it allows to receive invalidation messages from the kernel. But it requires us an invasive coding toward the base application in some cases; E.g, when we provide a feature performing with SELinux as a plugin module, it is unwelcome manner to launch its own worker thread from the module. If we could map /selinux/status to process memory space, application can know updates of selinux status; policy reload or setenforce. A typical application checks selinux_kernel_status::sequence when it tries to reference userspace avc. If it was changed from the last time when it checked userspace avc, it means something was updated in the kernel space. Then, the application can reset userspace avc or update current enforcing mode, without any system call invocations. This sequence number is updated according to the seqlock logic, so we need to wait for a while if it is odd number. Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com> Acked-by: Eric Paris <eparis@redhat.com> -- security/selinux/include/security.h | 21 ++++++ security/selinux/selinuxfs.c | 56 +++++++++++++++ security/selinux/ss/Makefile | 2 +- security/selinux/ss/services.c | 3 + security/selinux/ss/status.c | 129 +++++++++++++++++++++++++++++++++++ 5 files changed, 210 insertions(+), 1 deletions(-) Signed-off-by: James Morris <jmorris@namei.org>
2010-09-14 17:28:39 +08:00
SEL_STATUS, /* export current status using mmap() */
SEL_POLICY, /* allow userspace to read the in kernel policy */
SEL_VALIDATE_TRANS, /* compute validatetrans decision */
SEL_INO_NEXT, /* The next inode number to use */
};
struct selinux_fs_info {
struct dentry *bool_dir;
unsigned int bool_num;
char **bool_pending_names;
int *bool_pending_values;
struct dentry *class_dir;
unsigned long last_class_ino;
bool policy_opened;
struct dentry *policycap_dir;
unsigned long last_ino;
struct super_block *sb;
};
static int selinux_fs_info_create(struct super_block *sb)
{
struct selinux_fs_info *fsi;
fsi = kzalloc(sizeof(*fsi), GFP_KERNEL);
if (!fsi)
return -ENOMEM;
fsi->last_ino = SEL_INO_NEXT - 1;
fsi->sb = sb;
sb->s_fs_info = fsi;
return 0;
}
static void selinux_fs_info_free(struct super_block *sb)
{
struct selinux_fs_info *fsi = sb->s_fs_info;
unsigned int i;
if (fsi) {
for (i = 0; i < fsi->bool_num; i++)
kfree(fsi->bool_pending_names[i]);
kfree(fsi->bool_pending_names);
kfree(fsi->bool_pending_values);
}
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
}
#define SEL_INITCON_INO_OFFSET 0x01000000
#define SEL_BOOL_INO_OFFSET 0x02000000
#define SEL_CLASS_INO_OFFSET 0x04000000
#define SEL_POLICYCAP_INO_OFFSET 0x08000000
#define SEL_INO_MASK 0x00ffffff
#define BOOL_DIR_NAME "booleans"
#define CLASS_DIR_NAME "class"
#define POLICYCAP_DIR_NAME "policy_capabilities"
#define TMPBUFLEN 12
static ssize_t sel_read_enforce(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
char tmpbuf[TMPBUFLEN];
ssize_t length;
length = scnprintf(tmpbuf, TMPBUFLEN, "%d",
enforcing_enabled());
return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
}
#ifdef CONFIG_SECURITY_SELINUX_DEVELOP
static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
char *page = NULL;
ssize_t length;
int scan_value;
bool old_value, new_value;
if (count >= PAGE_SIZE)
return -ENOMEM;
/* No partial writes. */
if (*ppos != 0)
return -EINVAL;
page = memdup_user_nul(buf, count);
if (IS_ERR(page))
return PTR_ERR(page);
length = -EINVAL;
if (sscanf(page, "%d", &scan_value) != 1)
goto out;
new_value = !!scan_value;
old_value = enforcing_enabled();
if (new_value != old_value) {
length = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__SETENFORCE,
NULL);
if (length)
goto out;
audit_log(audit_context(), GFP_KERNEL, AUDIT_MAC_STATUS,
"enforcing=%d old_enforcing=%d auid=%u ses=%u"
" enabled=1 old-enabled=1 lsm=selinux res=1",
new_value, old_value,
from_kuid(&init_user_ns, audit_get_loginuid(current)),
audit_get_sessionid(current));
enforcing_set(new_value);
if (new_value)
avc_ss_reset(0);
selnl_notify_setenforce(new_value);
selinux_status_update_setenforce(new_value);
if (!new_value)
call_blocking_lsm_notifier(LSM_POLICY_CHANGE, NULL);
selinux: measure state and policy capabilities SELinux stores the configuration state and the policy capabilities in kernel memory. Changes to this data at runtime would have an impact on the security guarantees provided by SELinux. Measuring this data through IMA subsystem provides a tamper-resistant way for an attestation service to remotely validate it at runtime. Measure the configuration state and policy capabilities by calling the IMA hook ima_measure_critical_data(). To enable SELinux data measurement, the following steps are required: 1, Add "ima_policy=critical_data" to the kernel command line arguments to enable measuring SELinux data at boot time. For example, BOOT_IMAGE=/boot/vmlinuz-5.11.0-rc3+ root=UUID=fd643309-a5d2-4ed3-b10d-3c579a5fab2f ro nomodeset security=selinux ima_policy=critical_data 2, Add the following rule to /etc/ima/ima-policy measure func=CRITICAL_DATA label=selinux Sample measurement of SELinux state and policy capabilities: 10 2122...65d8 ima-buf sha256:13c2...1292 selinux-state 696e...303b Execute the following command to extract the measured data from the IMA's runtime measurements list: grep "selinux-state" /sys/kernel/security/integrity/ima/ascii_runtime_measurements | tail -1 | cut -d' ' -f 6 | xxd -r -p The output should be a list of key-value pairs. For example, initialized=1;enforcing=0;checkreqprot=1;network_peer_controls=1;open_perms=1;extended_socket_class=1;always_check_network=0;cgroup_seclabel=1;nnp_nosuid_transition=1;genfs_seclabel_symlinks=0; To verify the measurement is consistent with the current SELinux state reported on the system, compare the integer values in the following files with those set in the IMA measurement (using the following commands): - cat /sys/fs/selinux/enforce - cat /sys/fs/selinux/checkreqprot - cat /sys/fs/selinux/policy_capabilities/[capability_file] Note that the actual verification would be against an expected state and done on a separate system (likely an attestation server) requiring "initialized=1;enforcing=1;checkreqprot=0;" for a secure state and then whatever policy capabilities are actually set in the expected policy (which can be extracted from the policy itself via seinfo, for example). Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com> Suggested-by: Stephen Smalley <stephen.smalley.work@gmail.com> Suggested-by: Paul Moore <paul@paul-moore.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2021-02-13 00:37:09 +08:00
selinux_ima_measure_state();
}
length = count;
out:
kfree(page);
return length;
}
#else
#define sel_write_enforce NULL
#endif
static const struct file_operations sel_enforce_ops = {
.read = sel_read_enforce,
.write = sel_write_enforce,
.llseek = generic_file_llseek,
};
static ssize_t sel_read_handle_unknown(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
char tmpbuf[TMPBUFLEN];
ssize_t length;
ino_t ino = file_inode(filp)->i_ino;
int handle_unknown = (ino == SEL_REJECT_UNKNOWN) ?
security_get_reject_unknown() :
!security_get_allow_unknown();
length = scnprintf(tmpbuf, TMPBUFLEN, "%d", handle_unknown);
return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
}
static const struct file_operations sel_handle_unknown_ops = {
.read = sel_read_handle_unknown,
.llseek = generic_file_llseek,
};
selinux: fast status update interface (/selinux/status) This patch provides a new /selinux/status entry which allows applications read-only mmap(2). This region reflects selinux_kernel_status structure in kernel space. struct selinux_kernel_status { u32 length; /* length of this structure */ u32 sequence; /* sequence number of seqlock logic */ u32 enforcing; /* current setting of enforcing mode */ u32 policyload; /* times of policy reloaded */ u32 deny_unknown; /* current setting of deny_unknown */ }; When userspace object manager caches access control decisions provided by SELinux, it needs to invalidate the cache on policy reload and setenforce to keep consistency. However, the applications need to check the kernel state for each accesses on userspace avc, or launch a background worker process. In heuristic, frequency of invalidation is much less than frequency of making access control decision, so it is annoying to invoke a system call to check we don't need to invalidate the userspace cache. If we can use a background worker thread, it allows to receive invalidation messages from the kernel. But it requires us an invasive coding toward the base application in some cases; E.g, when we provide a feature performing with SELinux as a plugin module, it is unwelcome manner to launch its own worker thread from the module. If we could map /selinux/status to process memory space, application can know updates of selinux status; policy reload or setenforce. A typical application checks selinux_kernel_status::sequence when it tries to reference userspace avc. If it was changed from the last time when it checked userspace avc, it means something was updated in the kernel space. Then, the application can reset userspace avc or update current enforcing mode, without any system call invocations. This sequence number is updated according to the seqlock logic, so we need to wait for a while if it is odd number. Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com> Acked-by: Eric Paris <eparis@redhat.com> -- security/selinux/include/security.h | 21 ++++++ security/selinux/selinuxfs.c | 56 +++++++++++++++ security/selinux/ss/Makefile | 2 +- security/selinux/ss/services.c | 3 + security/selinux/ss/status.c | 129 +++++++++++++++++++++++++++++++++++ 5 files changed, 210 insertions(+), 1 deletions(-) Signed-off-by: James Morris <jmorris@namei.org>
2010-09-14 17:28:39 +08:00
static int sel_open_handle_status(struct inode *inode, struct file *filp)
{
struct page *status = selinux_kernel_status_page();
selinux: fast status update interface (/selinux/status) This patch provides a new /selinux/status entry which allows applications read-only mmap(2). This region reflects selinux_kernel_status structure in kernel space. struct selinux_kernel_status { u32 length; /* length of this structure */ u32 sequence; /* sequence number of seqlock logic */ u32 enforcing; /* current setting of enforcing mode */ u32 policyload; /* times of policy reloaded */ u32 deny_unknown; /* current setting of deny_unknown */ }; When userspace object manager caches access control decisions provided by SELinux, it needs to invalidate the cache on policy reload and setenforce to keep consistency. However, the applications need to check the kernel state for each accesses on userspace avc, or launch a background worker process. In heuristic, frequency of invalidation is much less than frequency of making access control decision, so it is annoying to invoke a system call to check we don't need to invalidate the userspace cache. If we can use a background worker thread, it allows to receive invalidation messages from the kernel. But it requires us an invasive coding toward the base application in some cases; E.g, when we provide a feature performing with SELinux as a plugin module, it is unwelcome manner to launch its own worker thread from the module. If we could map /selinux/status to process memory space, application can know updates of selinux status; policy reload or setenforce. A typical application checks selinux_kernel_status::sequence when it tries to reference userspace avc. If it was changed from the last time when it checked userspace avc, it means something was updated in the kernel space. Then, the application can reset userspace avc or update current enforcing mode, without any system call invocations. This sequence number is updated according to the seqlock logic, so we need to wait for a while if it is odd number. Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com> Acked-by: Eric Paris <eparis@redhat.com> -- security/selinux/include/security.h | 21 ++++++ security/selinux/selinuxfs.c | 56 +++++++++++++++ security/selinux/ss/Makefile | 2 +- security/selinux/ss/services.c | 3 + security/selinux/ss/status.c | 129 +++++++++++++++++++++++++++++++++++ 5 files changed, 210 insertions(+), 1 deletions(-) Signed-off-by: James Morris <jmorris@namei.org>
2010-09-14 17:28:39 +08:00
if (!status)
return -ENOMEM;
filp->private_data = status;
return 0;
}
static ssize_t sel_read_handle_status(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
struct page *status = filp->private_data;
BUG_ON(!status);
return simple_read_from_buffer(buf, count, ppos,
page_address(status),
sizeof(struct selinux_kernel_status));
}
static int sel_mmap_handle_status(struct file *filp,
struct vm_area_struct *vma)
{
struct page *status = filp->private_data;
unsigned long size = vma->vm_end - vma->vm_start;
BUG_ON(!status);
/* only allows one page from the head */
if (vma->vm_pgoff > 0 || size != PAGE_SIZE)
return -EIO;
/* disallow writable mapping */
if (vma->vm_flags & VM_WRITE)
return -EPERM;
/* disallow mprotect() turns it into writable */
mm: replace vma->vm_flags direct modifications with modifier calls Replace direct modifications to vma->vm_flags with calls to modifier functions to be able to track flag changes and to keep vma locking correctness. [akpm@linux-foundation.org: fix drivers/misc/open-dice.c, per Hyeonggon Yoo] Link: https://lkml.kernel.org/r/20230126193752.297968-5-surenb@google.com Signed-off-by: Suren Baghdasaryan <surenb@google.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjun Roy <arjunroy@google.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: David Rientjes <rientjes@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jann Horn <jannh@google.com> Cc: Joel Fernandes <joelaf@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kent Overstreet <kent.overstreet@linux.dev> Cc: Laurent Dufour <ldufour@linux.ibm.com> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Minchan Kim <minchan@google.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Peter Oskolkov <posk@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Punit Agrawal <punit.agrawal@bytedance.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Shakeel Butt <shakeelb@google.com> Cc: Soheil Hassas Yeganeh <soheil@google.com> Cc: Song Liu <songliubraving@fb.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-01-27 03:37:49 +08:00
vm_flags_clear(vma, VM_MAYWRITE);
selinux: fast status update interface (/selinux/status) This patch provides a new /selinux/status entry which allows applications read-only mmap(2). This region reflects selinux_kernel_status structure in kernel space. struct selinux_kernel_status { u32 length; /* length of this structure */ u32 sequence; /* sequence number of seqlock logic */ u32 enforcing; /* current setting of enforcing mode */ u32 policyload; /* times of policy reloaded */ u32 deny_unknown; /* current setting of deny_unknown */ }; When userspace object manager caches access control decisions provided by SELinux, it needs to invalidate the cache on policy reload and setenforce to keep consistency. However, the applications need to check the kernel state for each accesses on userspace avc, or launch a background worker process. In heuristic, frequency of invalidation is much less than frequency of making access control decision, so it is annoying to invoke a system call to check we don't need to invalidate the userspace cache. If we can use a background worker thread, it allows to receive invalidation messages from the kernel. But it requires us an invasive coding toward the base application in some cases; E.g, when we provide a feature performing with SELinux as a plugin module, it is unwelcome manner to launch its own worker thread from the module. If we could map /selinux/status to process memory space, application can know updates of selinux status; policy reload or setenforce. A typical application checks selinux_kernel_status::sequence when it tries to reference userspace avc. If it was changed from the last time when it checked userspace avc, it means something was updated in the kernel space. Then, the application can reset userspace avc or update current enforcing mode, without any system call invocations. This sequence number is updated according to the seqlock logic, so we need to wait for a while if it is odd number. Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com> Acked-by: Eric Paris <eparis@redhat.com> -- security/selinux/include/security.h | 21 ++++++ security/selinux/selinuxfs.c | 56 +++++++++++++++ security/selinux/ss/Makefile | 2 +- security/selinux/ss/services.c | 3 + security/selinux/ss/status.c | 129 +++++++++++++++++++++++++++++++++++ 5 files changed, 210 insertions(+), 1 deletions(-) Signed-off-by: James Morris <jmorris@namei.org>
2010-09-14 17:28:39 +08:00
return remap_pfn_range(vma, vma->vm_start,
page_to_pfn(status),
size, vma->vm_page_prot);
}
static const struct file_operations sel_handle_status_ops = {
.open = sel_open_handle_status,
.read = sel_read_handle_status,
.mmap = sel_mmap_handle_status,
.llseek = generic_file_llseek,
};
static ssize_t sel_write_disable(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
char *page;
ssize_t length;
int new_value;
selinux: deprecate disabling SELinux and runtime Deprecate the CONFIG_SECURITY_SELINUX_DISABLE functionality. The code was originally developed to make it easier for Linux distributions to support architectures where adding parameters to the kernel command line was difficult. Unfortunately, supporting runtime disable meant we had to make some security trade-offs when it came to the LSM hooks, as documented in the Kconfig help text: NOTE: selecting this option will disable the '__ro_after_init' kernel hardening feature for security hooks. Please consider using the selinux=0 boot parameter instead of enabling this option. Fortunately it looks as if that the original motivation for the runtime disable functionality is gone, and Fedora/RHEL appears to be the only major distribution enabling this capability at build time so we are now taking steps to remove it entirely from the kernel. The first step is to mark the functionality as deprecated and print an error when it is used (what this patch is doing). As Fedora/RHEL makes progress in transitioning the distribution away from runtime disable, we will introduce follow-up patches over several kernel releases which will block for increasing periods of time when the runtime disable is used. Finally we will remove the option entirely once we believe all users have moved to the kernel cmdline approach. Acked-by: Casey Schaufler <casey@schaufler-ca.com> Acked-by: Ondrej Mosnacek <omosnace@redhat.com> Acked-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-12-19 10:45:08 +08:00
if (count >= PAGE_SIZE)
return -ENOMEM;
/* No partial writes. */
if (*ppos != 0)
return -EINVAL;
page = memdup_user_nul(buf, count);
if (IS_ERR(page))
return PTR_ERR(page);
selinux: remove the runtime disable functionality After working with the larger SELinux-based distros for several years, we're finally at a place where we can disable the SELinux runtime disable functionality. The existing kernel deprecation notice explains the functionality and why we want to remove it: The selinuxfs "disable" node allows SELinux to be disabled at runtime prior to a policy being loaded into the kernel. If disabled via this mechanism, SELinux will remain disabled until the system is rebooted. The preferred method of disabling SELinux is via the "selinux=0" boot parameter, but the selinuxfs "disable" node was created to make it easier for systems with primitive bootloaders that did not allow for easy modification of the kernel command line. Unfortunately, allowing for SELinux to be disabled at runtime makes it difficult to secure the kernel's LSM hooks using the "__ro_after_init" feature. It is that last sentence, mentioning the '__ro_after_init' hardening, which is the real motivation for this change, and if you look at the diffstat you'll see that the impact of this patch reaches across all the different LSMs, helping prevent tampering at the LSM hook level. From a SELinux perspective, it is important to note that if you continue to disable SELinux via "/etc/selinux/config" it may appear that SELinux is disabled, but it is simply in an uninitialized state. If you load a policy with `load_policy -i`, you will see SELinux come alive just as if you had loaded the policy during early-boot. It is also worth noting that the "/sys/fs/selinux/disable" file is always writable now, regardless of the Kconfig settings, but writing to the file has no effect on the system, other than to display an error on the console if a non-zero/true value is written. Finally, in the several years where we have been working on deprecating this functionality, there has only been one instance of someone mentioning any user visible breakage. In this particular case it was an individual's kernel test system, and the workaround documented in the deprecation notice ("selinux=0" on the kernel command line) resolved the issue without problem. Acked-by: Casey Schaufler <casey@schaufler-ca.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-03-18 00:43:07 +08:00
if (sscanf(page, "%d", &new_value) != 1) {
length = -EINVAL;
goto out;
selinux: remove the runtime disable functionality After working with the larger SELinux-based distros for several years, we're finally at a place where we can disable the SELinux runtime disable functionality. The existing kernel deprecation notice explains the functionality and why we want to remove it: The selinuxfs "disable" node allows SELinux to be disabled at runtime prior to a policy being loaded into the kernel. If disabled via this mechanism, SELinux will remain disabled until the system is rebooted. The preferred method of disabling SELinux is via the "selinux=0" boot parameter, but the selinuxfs "disable" node was created to make it easier for systems with primitive bootloaders that did not allow for easy modification of the kernel command line. Unfortunately, allowing for SELinux to be disabled at runtime makes it difficult to secure the kernel's LSM hooks using the "__ro_after_init" feature. It is that last sentence, mentioning the '__ro_after_init' hardening, which is the real motivation for this change, and if you look at the diffstat you'll see that the impact of this patch reaches across all the different LSMs, helping prevent tampering at the LSM hook level. From a SELinux perspective, it is important to note that if you continue to disable SELinux via "/etc/selinux/config" it may appear that SELinux is disabled, but it is simply in an uninitialized state. If you load a policy with `load_policy -i`, you will see SELinux come alive just as if you had loaded the policy during early-boot. It is also worth noting that the "/sys/fs/selinux/disable" file is always writable now, regardless of the Kconfig settings, but writing to the file has no effect on the system, other than to display an error on the console if a non-zero/true value is written. Finally, in the several years where we have been working on deprecating this functionality, there has only been one instance of someone mentioning any user visible breakage. In this particular case it was an individual's kernel test system, and the workaround documented in the deprecation notice ("selinux=0" on the kernel command line) resolved the issue without problem. Acked-by: Casey Schaufler <casey@schaufler-ca.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-03-18 00:43:07 +08:00
}
length = count;
if (new_value) {
selinux: remove the runtime disable functionality After working with the larger SELinux-based distros for several years, we're finally at a place where we can disable the SELinux runtime disable functionality. The existing kernel deprecation notice explains the functionality and why we want to remove it: The selinuxfs "disable" node allows SELinux to be disabled at runtime prior to a policy being loaded into the kernel. If disabled via this mechanism, SELinux will remain disabled until the system is rebooted. The preferred method of disabling SELinux is via the "selinux=0" boot parameter, but the selinuxfs "disable" node was created to make it easier for systems with primitive bootloaders that did not allow for easy modification of the kernel command line. Unfortunately, allowing for SELinux to be disabled at runtime makes it difficult to secure the kernel's LSM hooks using the "__ro_after_init" feature. It is that last sentence, mentioning the '__ro_after_init' hardening, which is the real motivation for this change, and if you look at the diffstat you'll see that the impact of this patch reaches across all the different LSMs, helping prevent tampering at the LSM hook level. From a SELinux perspective, it is important to note that if you continue to disable SELinux via "/etc/selinux/config" it may appear that SELinux is disabled, but it is simply in an uninitialized state. If you load a policy with `load_policy -i`, you will see SELinux come alive just as if you had loaded the policy during early-boot. It is also worth noting that the "/sys/fs/selinux/disable" file is always writable now, regardless of the Kconfig settings, but writing to the file has no effect on the system, other than to display an error on the console if a non-zero/true value is written. Finally, in the several years where we have been working on deprecating this functionality, there has only been one instance of someone mentioning any user visible breakage. In this particular case it was an individual's kernel test system, and the workaround documented in the deprecation notice ("selinux=0" on the kernel command line) resolved the issue without problem. Acked-by: Casey Schaufler <casey@schaufler-ca.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-03-18 00:43:07 +08:00
pr_err("SELinux: https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-runtime-disable\n");
pr_err("SELinux: Runtime disable is not supported, use selinux=0 on the kernel cmdline.\n");
}
out:
kfree(page);
return length;
}
static const struct file_operations sel_disable_ops = {
.write = sel_write_disable,
.llseek = generic_file_llseek,
};
static ssize_t sel_read_policyvers(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
char tmpbuf[TMPBUFLEN];
ssize_t length;
length = scnprintf(tmpbuf, TMPBUFLEN, "%u", POLICYDB_VERSION_MAX);
return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
}
static const struct file_operations sel_policyvers_ops = {
.read = sel_read_policyvers,
.llseek = generic_file_llseek,
};
/* declaration for sel_write_load */
static int sel_make_bools(struct selinux_policy *newpolicy, struct dentry *bool_dir,
unsigned int *bool_num, char ***bool_pending_names,
int **bool_pending_values);
static int sel_make_classes(struct selinux_policy *newpolicy,
struct dentry *class_dir,
unsigned long *last_class_ino);
/* declaration for sel_make_class_dirs */
static struct dentry *sel_make_dir(struct dentry *dir, const char *name,
unsigned long *ino);
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
/* declaration for sel_make_policy_nodes */
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
static struct dentry *sel_make_swapover_dir(struct super_block *sb,
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
unsigned long *ino);
static ssize_t sel_read_mls(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
char tmpbuf[TMPBUFLEN];
ssize_t length;
length = scnprintf(tmpbuf, TMPBUFLEN, "%d",
security_mls_enabled());
return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
}
static const struct file_operations sel_mls_ops = {
.read = sel_read_mls,
.llseek = generic_file_llseek,
};
struct policy_load_memory {
size_t len;
void *data;
};
static int sel_open_policy(struct inode *inode, struct file *filp)
{
struct selinux_fs_info *fsi = inode->i_sb->s_fs_info;
struct policy_load_memory *plm = NULL;
int rc;
BUG_ON(filp->private_data);
mutex_lock(&selinux_state.policy_mutex);
rc = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL);
if (rc)
goto err;
rc = -EBUSY;
if (fsi->policy_opened)
goto err;
rc = -ENOMEM;
plm = kzalloc(sizeof(*plm), GFP_KERNEL);
if (!plm)
goto err;
rc = security_read_policy(&plm->data, &plm->len);
if (rc)
goto err;
if ((size_t)i_size_read(inode) != plm->len) {
inode_lock(inode);
i_size_write(inode, plm->len);
inode_unlock(inode);
}
fsi->policy_opened = 1;
filp->private_data = plm;
mutex_unlock(&selinux_state.policy_mutex);
return 0;
err:
mutex_unlock(&selinux_state.policy_mutex);
if (plm)
vfree(plm->data);
kfree(plm);
return rc;
}
static int sel_release_policy(struct inode *inode, struct file *filp)
{
struct selinux_fs_info *fsi = inode->i_sb->s_fs_info;
struct policy_load_memory *plm = filp->private_data;
BUG_ON(!plm);
fsi->policy_opened = 0;
vfree(plm->data);
kfree(plm);
return 0;
}
static ssize_t sel_read_policy(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
struct policy_load_memory *plm = filp->private_data;
int ret;
ret = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL);
if (ret)
return ret;
return simple_read_from_buffer(buf, count, ppos, plm->data, plm->len);
}
static vm_fault_t sel_mmap_policy_fault(struct vm_fault *vmf)
{
struct policy_load_memory *plm = vmf->vma->vm_file->private_data;
unsigned long offset;
struct page *page;
if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE))
return VM_FAULT_SIGBUS;
offset = vmf->pgoff << PAGE_SHIFT;
if (offset >= roundup(plm->len, PAGE_SIZE))
return VM_FAULT_SIGBUS;
page = vmalloc_to_page(plm->data + offset);
get_page(page);
vmf->page = page;
return 0;
}
static const struct vm_operations_struct sel_mmap_policy_ops = {
.fault = sel_mmap_policy_fault,
.page_mkwrite = sel_mmap_policy_fault,
};
static int sel_mmap_policy(struct file *filp, struct vm_area_struct *vma)
{
if (vma->vm_flags & VM_SHARED) {
/* do not allow mprotect to make mapping writable */
mm: replace vma->vm_flags direct modifications with modifier calls Replace direct modifications to vma->vm_flags with calls to modifier functions to be able to track flag changes and to keep vma locking correctness. [akpm@linux-foundation.org: fix drivers/misc/open-dice.c, per Hyeonggon Yoo] Link: https://lkml.kernel.org/r/20230126193752.297968-5-surenb@google.com Signed-off-by: Suren Baghdasaryan <surenb@google.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjun Roy <arjunroy@google.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: David Rientjes <rientjes@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jann Horn <jannh@google.com> Cc: Joel Fernandes <joelaf@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kent Overstreet <kent.overstreet@linux.dev> Cc: Laurent Dufour <ldufour@linux.ibm.com> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Minchan Kim <minchan@google.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Peter Oskolkov <posk@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Punit Agrawal <punit.agrawal@bytedance.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Shakeel Butt <shakeelb@google.com> Cc: Soheil Hassas Yeganeh <soheil@google.com> Cc: Song Liu <songliubraving@fb.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-01-27 03:37:49 +08:00
vm_flags_clear(vma, VM_MAYWRITE);
if (vma->vm_flags & VM_WRITE)
return -EACCES;
}
mm: replace vma->vm_flags direct modifications with modifier calls Replace direct modifications to vma->vm_flags with calls to modifier functions to be able to track flag changes and to keep vma locking correctness. [akpm@linux-foundation.org: fix drivers/misc/open-dice.c, per Hyeonggon Yoo] Link: https://lkml.kernel.org/r/20230126193752.297968-5-surenb@google.com Signed-off-by: Suren Baghdasaryan <surenb@google.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjun Roy <arjunroy@google.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: David Rientjes <rientjes@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jann Horn <jannh@google.com> Cc: Joel Fernandes <joelaf@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kent Overstreet <kent.overstreet@linux.dev> Cc: Laurent Dufour <ldufour@linux.ibm.com> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Minchan Kim <minchan@google.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Peter Oskolkov <posk@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Punit Agrawal <punit.agrawal@bytedance.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Shakeel Butt <shakeelb@google.com> Cc: Soheil Hassas Yeganeh <soheil@google.com> Cc: Song Liu <songliubraving@fb.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-01-27 03:37:49 +08:00
vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
vma->vm_ops = &sel_mmap_policy_ops;
return 0;
}
static const struct file_operations sel_policy_ops = {
.open = sel_open_policy,
.read = sel_read_policy,
.mmap = sel_mmap_policy,
.release = sel_release_policy,
.llseek = generic_file_llseek,
};
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
static void sel_remove_old_bool_data(unsigned int bool_num, char **bool_names,
int *bool_values)
{
u32 i;
/* bool_dir cleanup */
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
for (i = 0; i < bool_num; i++)
kfree(bool_names[i]);
kfree(bool_names);
kfree(bool_values);
}
selinux: move policy commit after updating selinuxfs With the refactoring of the policy load logic in the security server from the previous change, it is now possible to split out the committing of the new policy from security_load_policy() and perform it only after successful updating of selinuxfs. Change security_load_policy() to return the newly populated policy data structures to the caller, export selinux_policy_commit() for external callers, and introduce selinux_policy_cancel() to provide a way to cancel the policy load in the event of an error during updating of the selinuxfs directory tree. Further, rework the interfaces used by selinuxfs to get information from the policy when creating the new directory tree to take and act upon the new policy data structure rather than the current/active policy. Update selinuxfs to use these updated and new interfaces. While we are here, stop re-creating the policy_capabilities directory on each policy load since it does not depend on the policy, and stop trying to create the booleans and classes directories during the initial creation of selinuxfs since no information is available until first policy load. After this change, a failure while updating the booleans and class directories will cause the entire policy load to be canceled, leaving the original policy intact, and policy load notifications to userspace will only happen after a successful completion of updating those directories. This does not (yet) provide full atomicity with respect to the updating of the directory trees themselves. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-07 21:29:34 +08:00
static int sel_make_policy_nodes(struct selinux_fs_info *fsi,
struct selinux_policy *newpolicy)
{
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
int ret = 0;
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
struct dentry *tmp_parent, *tmp_bool_dir, *tmp_class_dir;
unsigned int bool_num = 0;
char **bool_names = NULL;
int *bool_values = NULL;
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
unsigned long tmp_ino = fsi->last_ino; /* Don't increment last_ino in this function */
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
tmp_parent = sel_make_swapover_dir(fsi->sb, &tmp_ino);
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
if (IS_ERR(tmp_parent))
return PTR_ERR(tmp_parent);
tmp_ino = fsi->bool_dir->d_inode->i_ino - 1; /* sel_make_dir will increment and set */
tmp_bool_dir = sel_make_dir(tmp_parent, BOOL_DIR_NAME, &tmp_ino);
if (IS_ERR(tmp_bool_dir)) {
ret = PTR_ERR(tmp_bool_dir);
goto out;
}
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
tmp_ino = fsi->class_dir->d_inode->i_ino - 1; /* sel_make_dir will increment and set */
tmp_class_dir = sel_make_dir(tmp_parent, CLASS_DIR_NAME, &tmp_ino);
if (IS_ERR(tmp_class_dir)) {
ret = PTR_ERR(tmp_class_dir);
goto out;
}
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
ret = sel_make_bools(newpolicy, tmp_bool_dir, &bool_num,
&bool_names, &bool_values);
if (ret)
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
goto out;
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
ret = sel_make_classes(newpolicy, tmp_class_dir,
&fsi->last_class_ino);
if (ret)
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
goto out;
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
lock_rename(tmp_parent, fsi->sb->s_root);
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
/* booleans */
d_exchange(tmp_bool_dir, fsi->bool_dir);
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
swap(fsi->bool_num, bool_num);
swap(fsi->bool_pending_names, bool_names);
swap(fsi->bool_pending_values, bool_values);
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
fsi->bool_dir = tmp_bool_dir;
/* classes */
d_exchange(tmp_class_dir, fsi->class_dir);
fsi->class_dir = tmp_class_dir;
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
unlock_rename(tmp_parent, fsi->sb->s_root);
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
out:
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
sel_remove_old_bool_data(bool_num, bool_names, bool_values);
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
/* Since the other temporary dirs are children of tmp_parent
* this will handle all the cleanup in the case of a failure before
* the swapover
*/
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
simple_recursive_removal(tmp_parent, NULL);
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
return ret;
}
static ssize_t sel_write_load(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info;
selinux: fix variable scope issue in live sidtab conversion Commit 02a52c5c8c3b ("selinux: move policy commit after updating selinuxfs") moved the selinux_policy_commit() call out of security_load_policy() into sel_write_load(), which caused a subtle yet rather serious bug. The problem is that security_load_policy() passes a reference to the convert_params local variable to sidtab_convert(), which stores it in the sidtab, where it may be accessed until the policy is swapped over and RCU synchronized. Before 02a52c5c8c3b, selinux_policy_commit() was called directly from security_load_policy(), so the convert_params pointer remained valid all the way until the old sidtab was destroyed, but now that's no longer the case and calls to sidtab_context_to_sid() on the old sidtab after security_load_policy() returns may cause invalid memory accesses. This can be easily triggered using the stress test from commit ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance"): ``` function rand_cat() { echo $(( $RANDOM % 1024 )) } function do_work() { while true; do echo -n "system_u:system_r:kernel_t:s0:c$(rand_cat),c$(rand_cat)" \ >/sys/fs/selinux/context 2>/dev/null || true done } do_work >/dev/null & do_work >/dev/null & do_work >/dev/null & while load_policy; do echo -n .; sleep 0.1; done kill %1 kill %2 kill %3 ``` Fix this by allocating the temporary sidtab convert structures dynamically and passing them among the selinux_policy_{load,cancel,commit} functions. Fixes: 02a52c5c8c3b ("selinux: move policy commit after updating selinuxfs") Cc: stable@vger.kernel.org Tested-by: Tyler Hicks <tyhicks@linux.microsoft.com> Reviewed-by: Tyler Hicks <tyhicks@linux.microsoft.com> Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com> [PM: merge fuzz in security.h and services.c] Signed-off-by: Paul Moore <paul@paul-moore.com>
2021-03-19 05:53:02 +08:00
struct selinux_load_state load_state;
ssize_t length;
void *data = NULL;
mutex_lock(&selinux_state.policy_mutex);
length = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__LOAD_POLICY, NULL);
if (length)
goto out;
/* No partial writes. */
length = -EINVAL;
if (*ppos != 0)
goto out;
length = -ENOMEM;
data = vmalloc(count);
if (!data)
goto out;
length = -EFAULT;
if (copy_from_user(data, buf, count) != 0)
goto out;
length = security_load_policy(data, count, &load_state);
if (length) {
pr_warn_ratelimited("SELinux: failed to load policy\n");
goto out;
}
selinux: fix variable scope issue in live sidtab conversion Commit 02a52c5c8c3b ("selinux: move policy commit after updating selinuxfs") moved the selinux_policy_commit() call out of security_load_policy() into sel_write_load(), which caused a subtle yet rather serious bug. The problem is that security_load_policy() passes a reference to the convert_params local variable to sidtab_convert(), which stores it in the sidtab, where it may be accessed until the policy is swapped over and RCU synchronized. Before 02a52c5c8c3b, selinux_policy_commit() was called directly from security_load_policy(), so the convert_params pointer remained valid all the way until the old sidtab was destroyed, but now that's no longer the case and calls to sidtab_context_to_sid() on the old sidtab after security_load_policy() returns may cause invalid memory accesses. This can be easily triggered using the stress test from commit ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance"): ``` function rand_cat() { echo $(( $RANDOM % 1024 )) } function do_work() { while true; do echo -n "system_u:system_r:kernel_t:s0:c$(rand_cat),c$(rand_cat)" \ >/sys/fs/selinux/context 2>/dev/null || true done } do_work >/dev/null & do_work >/dev/null & do_work >/dev/null & while load_policy; do echo -n .; sleep 0.1; done kill %1 kill %2 kill %3 ``` Fix this by allocating the temporary sidtab convert structures dynamically and passing them among the selinux_policy_{load,cancel,commit} functions. Fixes: 02a52c5c8c3b ("selinux: move policy commit after updating selinuxfs") Cc: stable@vger.kernel.org Tested-by: Tyler Hicks <tyhicks@linux.microsoft.com> Reviewed-by: Tyler Hicks <tyhicks@linux.microsoft.com> Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com> [PM: merge fuzz in security.h and services.c] Signed-off-by: Paul Moore <paul@paul-moore.com>
2021-03-19 05:53:02 +08:00
length = sel_make_policy_nodes(fsi, load_state.policy);
selinux: move policy commit after updating selinuxfs With the refactoring of the policy load logic in the security server from the previous change, it is now possible to split out the committing of the new policy from security_load_policy() and perform it only after successful updating of selinuxfs. Change security_load_policy() to return the newly populated policy data structures to the caller, export selinux_policy_commit() for external callers, and introduce selinux_policy_cancel() to provide a way to cancel the policy load in the event of an error during updating of the selinuxfs directory tree. Further, rework the interfaces used by selinuxfs to get information from the policy when creating the new directory tree to take and act upon the new policy data structure rather than the current/active policy. Update selinuxfs to use these updated and new interfaces. While we are here, stop re-creating the policy_capabilities directory on each policy load since it does not depend on the policy, and stop trying to create the booleans and classes directories during the initial creation of selinuxfs since no information is available until first policy load. After this change, a failure while updating the booleans and class directories will cause the entire policy load to be canceled, leaving the original policy intact, and policy load notifications to userspace will only happen after a successful completion of updating those directories. This does not (yet) provide full atomicity with respect to the updating of the directory trees themselves. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-07 21:29:34 +08:00
if (length) {
pr_warn_ratelimited("SELinux: failed to initialize selinuxfs\n");
selinux_policy_cancel(&load_state);
goto out;
selinux: move policy commit after updating selinuxfs With the refactoring of the policy load logic in the security server from the previous change, it is now possible to split out the committing of the new policy from security_load_policy() and perform it only after successful updating of selinuxfs. Change security_load_policy() to return the newly populated policy data structures to the caller, export selinux_policy_commit() for external callers, and introduce selinux_policy_cancel() to provide a way to cancel the policy load in the event of an error during updating of the selinuxfs directory tree. Further, rework the interfaces used by selinuxfs to get information from the policy when creating the new directory tree to take and act upon the new policy data structure rather than the current/active policy. Update selinuxfs to use these updated and new interfaces. While we are here, stop re-creating the policy_capabilities directory on each policy load since it does not depend on the policy, and stop trying to create the booleans and classes directories during the initial creation of selinuxfs since no information is available until first policy load. After this change, a failure while updating the booleans and class directories will cause the entire policy load to be canceled, leaving the original policy intact, and policy load notifications to userspace will only happen after a successful completion of updating those directories. This does not (yet) provide full atomicity with respect to the updating of the directory trees themselves. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-07 21:29:34 +08:00
}
selinux_policy_commit(&load_state);
length = count;
audit_log(audit_context(), GFP_KERNEL, AUDIT_MAC_POLICY_LOAD,
"auid=%u ses=%u lsm=selinux res=1",
from_kuid(&init_user_ns, audit_get_loginuid(current)),
audit_get_sessionid(current));
out:
mutex_unlock(&selinux_state.policy_mutex);
vfree(data);
return length;
}
static const struct file_operations sel_load_ops = {
.write = sel_write_load,
.llseek = generic_file_llseek,
};
static ssize_t sel_write_context(struct file *file, char *buf, size_t size)
{
char *canon = NULL;
u32 sid, len;
ssize_t length;
length = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__CHECK_CONTEXT, NULL);
if (length)
goto out;
length = security_context_to_sid(buf, size, &sid, GFP_KERNEL);
if (length)
goto out;
length = security_sid_to_context(sid, &canon, &len);
if (length)
goto out;
length = -ERANGE;
if (len > SIMPLE_TRANSACTION_LIMIT) {
pr_err("SELinux: %s: context size (%u) exceeds "
"payload max\n", __func__, len);
goto out;
}
memcpy(buf, canon, len);
length = len;
out:
kfree(canon);
return length;
}
static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
char tmpbuf[TMPBUFLEN];
ssize_t length;
length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
checkreqprot_get());
return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
}
static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
char *page;
ssize_t length;
unsigned int new_value;
length = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__SETCHECKREQPROT,
NULL);
if (length)
return length;
if (count >= PAGE_SIZE)
return -ENOMEM;
/* No partial writes. */
if (*ppos != 0)
return -EINVAL;
page = memdup_user_nul(buf, count);
if (IS_ERR(page))
return PTR_ERR(page);
selinux: remove the 'checkreqprot' functionality We originally promised that the SELinux 'checkreqprot' functionality would be removed no sooner than June 2021, and now that it is March 2023 it seems like it is a good time to do the final removal. The deprecation notice in the kernel provides plenty of detail on why 'checkreqprot' is not desirable, with the key point repeated below: This was a compatibility mechanism for legacy userspace and for the READ_IMPLIES_EXEC personality flag. However, if set to 1, it weakens security by allowing mappings to be made executable without authorization by policy. The default value of checkreqprot at boot was changed starting in Linux v4.4 to 0 (i.e. check the actual protection), and Android and Linux distributions have been explicitly writing a "0" to /sys/fs/selinux/checkreqprot during initialization for some time. Along with the official deprecation notice, we have been discussing this on-list and directly with several of the larger SELinux-based distros and everyone is happy to see this feature finally removed. In an attempt to catch all of the smaller, and DIY, Linux systems we have been writing a deprecation notice URL into the kernel log, along with a growing ssleep() penalty, when admins enabled checkreqprot at runtime or via the kernel command line. We have yet to have anyone come to us and raise an objection to the deprecation or planned removal. It is worth noting that while this patch removes the checkreqprot functionality, it leaves the user visible interfaces (kernel command line and selinuxfs file) intact, just inert. This should help prevent breakages with existing userspace tools that correctly, but unnecessarily, disable checkreqprot at boot or runtime. Admins that attempt to enable checkreqprot will be met with a removal message in the kernel log. Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-03-16 23:43:08 +08:00
if (sscanf(page, "%u", &new_value) != 1) {
length = -EINVAL;
goto out;
selinux: remove the 'checkreqprot' functionality We originally promised that the SELinux 'checkreqprot' functionality would be removed no sooner than June 2021, and now that it is March 2023 it seems like it is a good time to do the final removal. The deprecation notice in the kernel provides plenty of detail on why 'checkreqprot' is not desirable, with the key point repeated below: This was a compatibility mechanism for legacy userspace and for the READ_IMPLIES_EXEC personality flag. However, if set to 1, it weakens security by allowing mappings to be made executable without authorization by policy. The default value of checkreqprot at boot was changed starting in Linux v4.4 to 0 (i.e. check the actual protection), and Android and Linux distributions have been explicitly writing a "0" to /sys/fs/selinux/checkreqprot during initialization for some time. Along with the official deprecation notice, we have been discussing this on-list and directly with several of the larger SELinux-based distros and everyone is happy to see this feature finally removed. In an attempt to catch all of the smaller, and DIY, Linux systems we have been writing a deprecation notice URL into the kernel log, along with a growing ssleep() penalty, when admins enabled checkreqprot at runtime or via the kernel command line. We have yet to have anyone come to us and raise an objection to the deprecation or planned removal. It is worth noting that while this patch removes the checkreqprot functionality, it leaves the user visible interfaces (kernel command line and selinuxfs file) intact, just inert. This should help prevent breakages with existing userspace tools that correctly, but unnecessarily, disable checkreqprot at boot or runtime. Admins that attempt to enable checkreqprot will be met with a removal message in the kernel log. Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-03-16 23:43:08 +08:00
}
length = count;
if (new_value) {
char comm[sizeof(current->comm)];
memcpy(comm, current->comm, sizeof(comm));
selinux: remove the 'checkreqprot' functionality We originally promised that the SELinux 'checkreqprot' functionality would be removed no sooner than June 2021, and now that it is March 2023 it seems like it is a good time to do the final removal. The deprecation notice in the kernel provides plenty of detail on why 'checkreqprot' is not desirable, with the key point repeated below: This was a compatibility mechanism for legacy userspace and for the READ_IMPLIES_EXEC personality flag. However, if set to 1, it weakens security by allowing mappings to be made executable without authorization by policy. The default value of checkreqprot at boot was changed starting in Linux v4.4 to 0 (i.e. check the actual protection), and Android and Linux distributions have been explicitly writing a "0" to /sys/fs/selinux/checkreqprot during initialization for some time. Along with the official deprecation notice, we have been discussing this on-list and directly with several of the larger SELinux-based distros and everyone is happy to see this feature finally removed. In an attempt to catch all of the smaller, and DIY, Linux systems we have been writing a deprecation notice URL into the kernel log, along with a growing ssleep() penalty, when admins enabled checkreqprot at runtime or via the kernel command line. We have yet to have anyone come to us and raise an objection to the deprecation or planned removal. It is worth noting that while this patch removes the checkreqprot functionality, it leaves the user visible interfaces (kernel command line and selinuxfs file) intact, just inert. This should help prevent breakages with existing userspace tools that correctly, but unnecessarily, disable checkreqprot at boot or runtime. Admins that attempt to enable checkreqprot will be met with a removal message in the kernel log. Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-03-16 23:43:08 +08:00
pr_err("SELinux: %s (%d) set checkreqprot to 1. This is no longer supported.\n",
comm, current->pid);
}
selinux_ima_measure_state();
selinux: measure state and policy capabilities SELinux stores the configuration state and the policy capabilities in kernel memory. Changes to this data at runtime would have an impact on the security guarantees provided by SELinux. Measuring this data through IMA subsystem provides a tamper-resistant way for an attestation service to remotely validate it at runtime. Measure the configuration state and policy capabilities by calling the IMA hook ima_measure_critical_data(). To enable SELinux data measurement, the following steps are required: 1, Add "ima_policy=critical_data" to the kernel command line arguments to enable measuring SELinux data at boot time. For example, BOOT_IMAGE=/boot/vmlinuz-5.11.0-rc3+ root=UUID=fd643309-a5d2-4ed3-b10d-3c579a5fab2f ro nomodeset security=selinux ima_policy=critical_data 2, Add the following rule to /etc/ima/ima-policy measure func=CRITICAL_DATA label=selinux Sample measurement of SELinux state and policy capabilities: 10 2122...65d8 ima-buf sha256:13c2...1292 selinux-state 696e...303b Execute the following command to extract the measured data from the IMA's runtime measurements list: grep "selinux-state" /sys/kernel/security/integrity/ima/ascii_runtime_measurements | tail -1 | cut -d' ' -f 6 | xxd -r -p The output should be a list of key-value pairs. For example, initialized=1;enforcing=0;checkreqprot=1;network_peer_controls=1;open_perms=1;extended_socket_class=1;always_check_network=0;cgroup_seclabel=1;nnp_nosuid_transition=1;genfs_seclabel_symlinks=0; To verify the measurement is consistent with the current SELinux state reported on the system, compare the integer values in the following files with those set in the IMA measurement (using the following commands): - cat /sys/fs/selinux/enforce - cat /sys/fs/selinux/checkreqprot - cat /sys/fs/selinux/policy_capabilities/[capability_file] Note that the actual verification would be against an expected state and done on a separate system (likely an attestation server) requiring "initialized=1;enforcing=1;checkreqprot=0;" for a secure state and then whatever policy capabilities are actually set in the expected policy (which can be extracted from the policy itself via seinfo, for example). Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com> Suggested-by: Stephen Smalley <stephen.smalley.work@gmail.com> Suggested-by: Paul Moore <paul@paul-moore.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2021-02-13 00:37:09 +08:00
out:
kfree(page);
return length;
}
static const struct file_operations sel_checkreqprot_ops = {
.read = sel_read_checkreqprot,
.write = sel_write_checkreqprot,
.llseek = generic_file_llseek,
};
static ssize_t sel_write_validatetrans(struct file *file,
const char __user *buf,
size_t count, loff_t *ppos)
{
char *oldcon = NULL, *newcon = NULL, *taskcon = NULL;
char *req = NULL;
u32 osid, nsid, tsid;
u16 tclass;
int rc;
rc = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__VALIDATE_TRANS, NULL);
if (rc)
goto out;
rc = -ENOMEM;
if (count >= PAGE_SIZE)
goto out;
/* No partial writes. */
rc = -EINVAL;
if (*ppos != 0)
goto out;
req = memdup_user_nul(buf, count);
if (IS_ERR(req)) {
rc = PTR_ERR(req);
req = NULL;
goto out;
}
rc = -ENOMEM;
oldcon = kzalloc(count + 1, GFP_KERNEL);
if (!oldcon)
goto out;
newcon = kzalloc(count + 1, GFP_KERNEL);
if (!newcon)
goto out;
taskcon = kzalloc(count + 1, GFP_KERNEL);
if (!taskcon)
goto out;
rc = -EINVAL;
if (sscanf(req, "%s %s %hu %s", oldcon, newcon, &tclass, taskcon) != 4)
goto out;
rc = security_context_str_to_sid(oldcon, &osid, GFP_KERNEL);
if (rc)
goto out;
rc = security_context_str_to_sid(newcon, &nsid, GFP_KERNEL);
if (rc)
goto out;
rc = security_context_str_to_sid(taskcon, &tsid, GFP_KERNEL);
if (rc)
goto out;
rc = security_validate_transition_user(osid, nsid, tsid, tclass);
if (!rc)
rc = count;
out:
kfree(req);
kfree(oldcon);
kfree(newcon);
kfree(taskcon);
return rc;
}
static const struct file_operations sel_transition_ops = {
.write = sel_write_validatetrans,
.llseek = generic_file_llseek,
};
/*
* Remaining nodes use transaction based IO methods like nfsd/nfsctl.c
*/
static ssize_t sel_write_access(struct file *file, char *buf, size_t size);
static ssize_t sel_write_create(struct file *file, char *buf, size_t size);
static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size);
static ssize_t sel_write_user(struct file *file, char *buf, size_t size);
static ssize_t sel_write_member(struct file *file, char *buf, size_t size);
static ssize_t (*const write_op[])(struct file *, char *, size_t) = {
[SEL_ACCESS] = sel_write_access,
[SEL_CREATE] = sel_write_create,
[SEL_RELABEL] = sel_write_relabel,
[SEL_USER] = sel_write_user,
[SEL_MEMBER] = sel_write_member,
[SEL_CONTEXT] = sel_write_context,
};
static ssize_t selinux_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
{
ino_t ino = file_inode(file)->i_ino;
char *data;
ssize_t rv;
if (ino >= ARRAY_SIZE(write_op) || !write_op[ino])
return -EINVAL;
data = simple_transaction_get(file, buf, size);
if (IS_ERR(data))
return PTR_ERR(data);
rv = write_op[ino](file, data, size);
if (rv > 0) {
simple_transaction_set(file, rv);
rv = size;
}
return rv;
}
static const struct file_operations transaction_ops = {
.write = selinux_transaction_write,
.read = simple_transaction_read,
.release = simple_transaction_release,
.llseek = generic_file_llseek,
};
/*
* payload - write methods
* If the method has a response, the response should be put in buf,
* and the length returned. Otherwise return 0 or and -error.
*/
static ssize_t sel_write_access(struct file *file, char *buf, size_t size)
{
char *scon = NULL, *tcon = NULL;
u32 ssid, tsid;
u16 tclass;
struct av_decision avd;
ssize_t length;
length = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__COMPUTE_AV, NULL);
if (length)
goto out;
length = -ENOMEM;
scon = kzalloc(size + 1, GFP_KERNEL);
if (!scon)
goto out;
length = -ENOMEM;
tcon = kzalloc(size + 1, GFP_KERNEL);
if (!tcon)
goto out;
length = -EINVAL;
if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3)
goto out;
length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL);
if (length)
goto out;
length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL);
if (length)
goto out;
security_compute_av_user(ssid, tsid, tclass, &avd);
length = scnprintf(buf, SIMPLE_TRANSACTION_LIMIT,
2009-04-01 09:07:57 +08:00
"%x %x %x %x %u %x",
avd.allowed, 0xffffffff,
avd.auditallow, avd.auditdeny,
2009-04-01 09:07:57 +08:00
avd.seqno, avd.flags);
out:
kfree(tcon);
kfree(scon);
return length;
}
static ssize_t sel_write_create(struct file *file, char *buf, size_t size)
{
char *scon = NULL, *tcon = NULL;
char *namebuf = NULL, *objname = NULL;
u32 ssid, tsid, newsid;
u16 tclass;
ssize_t length;
char *newcon = NULL;
u32 len;
int nargs;
length = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__COMPUTE_CREATE,
NULL);
if (length)
goto out;
length = -ENOMEM;
scon = kzalloc(size + 1, GFP_KERNEL);
if (!scon)
goto out;
length = -ENOMEM;
tcon = kzalloc(size + 1, GFP_KERNEL);
if (!tcon)
goto out;
length = -ENOMEM;
namebuf = kzalloc(size + 1, GFP_KERNEL);
if (!namebuf)
goto out;
length = -EINVAL;
nargs = sscanf(buf, "%s %s %hu %s", scon, tcon, &tclass, namebuf);
if (nargs < 3 || nargs > 4)
goto out;
if (nargs == 4) {
/*
* If and when the name of new object to be queried contains
* either whitespace or multibyte characters, they shall be
* encoded based on the percentage-encoding rule.
* If not encoded, the sscanf logic picks up only left-half
* of the supplied name; split by a whitespace unexpectedly.
*/
char *r, *w;
int c1, c2;
r = w = namebuf;
do {
c1 = *r++;
if (c1 == '+')
c1 = ' ';
else if (c1 == '%') {
c1 = hex_to_bin(*r++);
if (c1 < 0)
goto out;
c2 = hex_to_bin(*r++);
if (c2 < 0)
goto out;
c1 = (c1 << 4) | c2;
}
*w++ = c1;
} while (c1 != '\0');
objname = namebuf;
}
length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL);
if (length)
goto out;
length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL);
if (length)
goto out;
length = security_transition_sid_user(ssid, tsid, tclass,
objname, &newsid);
if (length)
goto out;
length = security_sid_to_context(newsid, &newcon, &len);
if (length)
goto out;
length = -ERANGE;
if (len > SIMPLE_TRANSACTION_LIMIT) {
pr_err("SELinux: %s: context size (%u) exceeds "
"payload max\n", __func__, len);
goto out;
}
memcpy(buf, newcon, len);
length = len;
out:
kfree(newcon);
kfree(namebuf);
kfree(tcon);
kfree(scon);
return length;
}
static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size)
{
char *scon = NULL, *tcon = NULL;
u32 ssid, tsid, newsid;
u16 tclass;
ssize_t length;
char *newcon = NULL;
u32 len;
length = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__COMPUTE_RELABEL,
NULL);
if (length)
goto out;
length = -ENOMEM;
scon = kzalloc(size + 1, GFP_KERNEL);
if (!scon)
goto out;
length = -ENOMEM;
tcon = kzalloc(size + 1, GFP_KERNEL);
if (!tcon)
goto out;
length = -EINVAL;
if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3)
goto out;
length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL);
if (length)
goto out;
length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL);
if (length)
goto out;
length = security_change_sid(ssid, tsid, tclass, &newsid);
if (length)
goto out;
length = security_sid_to_context(newsid, &newcon, &len);
if (length)
goto out;
length = -ERANGE;
if (len > SIMPLE_TRANSACTION_LIMIT)
goto out;
memcpy(buf, newcon, len);
length = len;
out:
kfree(newcon);
kfree(tcon);
kfree(scon);
return length;
}
static ssize_t sel_write_user(struct file *file, char *buf, size_t size)
{
char *con = NULL, *user = NULL, *ptr;
u32 sid, *sids = NULL;
ssize_t length;
char *newcon;
int rc;
u32 i, len, nsids;
length = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__COMPUTE_USER,
NULL);
if (length)
goto out;
length = -ENOMEM;
con = kzalloc(size + 1, GFP_KERNEL);
if (!con)
goto out;
length = -ENOMEM;
user = kzalloc(size + 1, GFP_KERNEL);
if (!user)
goto out;
length = -EINVAL;
if (sscanf(buf, "%s %s", con, user) != 2)
goto out;
length = security_context_str_to_sid(con, &sid, GFP_KERNEL);
if (length)
goto out;
length = security_get_user_sids(sid, user, &sids, &nsids);
if (length)
goto out;
length = sprintf(buf, "%u", nsids) + 1;
ptr = buf + length;
for (i = 0; i < nsids; i++) {
rc = security_sid_to_context(sids[i], &newcon, &len);
if (rc) {
length = rc;
goto out;
}
if ((length + len) >= SIMPLE_TRANSACTION_LIMIT) {
kfree(newcon);
length = -ERANGE;
goto out;
}
memcpy(ptr, newcon, len);
kfree(newcon);
ptr += len;
length += len;
}
out:
kfree(sids);
kfree(user);
kfree(con);
return length;
}
static ssize_t sel_write_member(struct file *file, char *buf, size_t size)
{
char *scon = NULL, *tcon = NULL;
u32 ssid, tsid, newsid;
u16 tclass;
ssize_t length;
char *newcon = NULL;
u32 len;
length = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__COMPUTE_MEMBER,
NULL);
if (length)
goto out;
length = -ENOMEM;
scon = kzalloc(size + 1, GFP_KERNEL);
if (!scon)
goto out;
length = -ENOMEM;
tcon = kzalloc(size + 1, GFP_KERNEL);
if (!tcon)
goto out;
length = -EINVAL;
if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3)
goto out;
length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL);
if (length)
goto out;
length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL);
if (length)
goto out;
length = security_member_sid(ssid, tsid, tclass, &newsid);
if (length)
goto out;
length = security_sid_to_context(newsid, &newcon, &len);
if (length)
goto out;
length = -ERANGE;
if (len > SIMPLE_TRANSACTION_LIMIT) {
pr_err("SELinux: %s: context size (%u) exceeds "
"payload max\n", __func__, len);
goto out;
}
memcpy(buf, newcon, len);
length = len;
out:
kfree(newcon);
kfree(tcon);
kfree(scon);
return length;
}
static struct inode *sel_make_inode(struct super_block *sb, umode_t mode)
{
struct inode *ret = new_inode(sb);
if (ret) {
ret->i_mode = mode;
simple_inode_init_ts(ret);
}
return ret;
}
static ssize_t sel_read_bool(struct file *filep, char __user *buf,
size_t count, loff_t *ppos)
{
struct selinux_fs_info *fsi = file_inode(filep)->i_sb->s_fs_info;
char *page = NULL;
ssize_t length;
ssize_t ret;
int cur_enforcing;
unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK;
const char *name = filep->f_path.dentry->d_name.name;
mutex_lock(&selinux_state.policy_mutex);
ret = -EINVAL;
if (index >= fsi->bool_num || strcmp(name,
fsi->bool_pending_names[index]))
goto out_unlock;
ret = -ENOMEM;
page = (char *)get_zeroed_page(GFP_KERNEL);
if (!page)
goto out_unlock;
cur_enforcing = security_get_bool_value(index);
if (cur_enforcing < 0) {
ret = cur_enforcing;
goto out_unlock;
}
length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing,
fsi->bool_pending_values[index]);
mutex_unlock(&selinux_state.policy_mutex);
ret = simple_read_from_buffer(buf, count, ppos, page, length);
out_free:
free_page((unsigned long)page);
return ret;
out_unlock:
mutex_unlock(&selinux_state.policy_mutex);
goto out_free;
}
static ssize_t sel_write_bool(struct file *filep, const char __user *buf,
size_t count, loff_t *ppos)
{
struct selinux_fs_info *fsi = file_inode(filep)->i_sb->s_fs_info;
char *page = NULL;
ssize_t length;
int new_value;
unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK;
const char *name = filep->f_path.dentry->d_name.name;
if (count >= PAGE_SIZE)
return -ENOMEM;
/* No partial writes. */
if (*ppos != 0)
return -EINVAL;
page = memdup_user_nul(buf, count);
if (IS_ERR(page))
return PTR_ERR(page);
mutex_lock(&selinux_state.policy_mutex);
length = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__SETBOOL,
NULL);
if (length)
goto out;
length = -EINVAL;
if (index >= fsi->bool_num || strcmp(name,
fsi->bool_pending_names[index]))
goto out;
length = -EINVAL;
if (sscanf(page, "%d", &new_value) != 1)
goto out;
if (new_value)
new_value = 1;
fsi->bool_pending_values[index] = new_value;
length = count;
out:
mutex_unlock(&selinux_state.policy_mutex);
kfree(page);
return length;
}
static const struct file_operations sel_bool_ops = {
.read = sel_read_bool,
.write = sel_write_bool,
.llseek = generic_file_llseek,
};
static ssize_t sel_commit_bools_write(struct file *filep,
const char __user *buf,
size_t count, loff_t *ppos)
{
struct selinux_fs_info *fsi = file_inode(filep)->i_sb->s_fs_info;
char *page = NULL;
ssize_t length;
int new_value;
if (count >= PAGE_SIZE)
return -ENOMEM;
/* No partial writes. */
if (*ppos != 0)
return -EINVAL;
page = memdup_user_nul(buf, count);
if (IS_ERR(page))
return PTR_ERR(page);
mutex_lock(&selinux_state.policy_mutex);
length = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__SETBOOL,
NULL);
if (length)
goto out;
length = -EINVAL;
if (sscanf(page, "%d", &new_value) != 1)
goto out;
length = 0;
if (new_value && fsi->bool_pending_values)
length = security_set_bools(fsi->bool_num,
fsi->bool_pending_values);
if (!length)
length = count;
out:
mutex_unlock(&selinux_state.policy_mutex);
kfree(page);
return length;
}
static const struct file_operations sel_commit_bools_ops = {
.write = sel_commit_bools_write,
.llseek = generic_file_llseek,
};
static int sel_make_bools(struct selinux_policy *newpolicy, struct dentry *bool_dir,
unsigned int *bool_num, char ***bool_pending_names,
int **bool_pending_values)
{
int ret;
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
char **names, *page;
u32 i, num;
page = (char *)get_zeroed_page(GFP_KERNEL);
if (!page)
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
return -ENOMEM;
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
ret = security_get_bools(newpolicy, &num, &names, bool_pending_values);
if (ret)
goto out;
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
*bool_num = num;
*bool_pending_names = names;
for (i = 0; i < num; i++) {
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
struct dentry *dentry;
struct inode *inode;
struct inode_security_struct *isec;
ssize_t len;
u32 sid;
len = snprintf(page, PAGE_SIZE, "/%s/%s", BOOL_DIR_NAME, names[i]);
if (len >= PAGE_SIZE) {
ret = -ENAMETOOLONG;
break;
}
dentry = d_alloc_name(bool_dir, names[i]);
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
if (!dentry) {
ret = -ENOMEM;
break;
}
inode = sel_make_inode(bool_dir->d_sb, S_IFREG | S_IRUGO | S_IWUSR);
if (!inode) {
dput(dentry);
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
ret = -ENOMEM;
break;
}
isec = selinux_inode(inode);
selinux: move policy commit after updating selinuxfs With the refactoring of the policy load logic in the security server from the previous change, it is now possible to split out the committing of the new policy from security_load_policy() and perform it only after successful updating of selinuxfs. Change security_load_policy() to return the newly populated policy data structures to the caller, export selinux_policy_commit() for external callers, and introduce selinux_policy_cancel() to provide a way to cancel the policy load in the event of an error during updating of the selinuxfs directory tree. Further, rework the interfaces used by selinuxfs to get information from the policy when creating the new directory tree to take and act upon the new policy data structure rather than the current/active policy. Update selinuxfs to use these updated and new interfaces. While we are here, stop re-creating the policy_capabilities directory on each policy load since it does not depend on the policy, and stop trying to create the booleans and classes directories during the initial creation of selinuxfs since no information is available until first policy load. After this change, a failure while updating the booleans and class directories will cause the entire policy load to be canceled, leaving the original policy intact, and policy load notifications to userspace will only happen after a successful completion of updating those directories. This does not (yet) provide full atomicity with respect to the updating of the directory trees themselves. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-07 21:29:34 +08:00
ret = selinux_policy_genfs_sid(newpolicy, "selinuxfs", page,
SECCLASS_FILE, &sid);
if (ret) {
pr_warn_ratelimited("SELinux: no sid found, defaulting to security isid for %s\n",
page);
sid = SECINITSID_SECURITY;
}
isec->sid = sid;
isec->initialized = LABEL_INITIALIZED;
inode->i_fop = &sel_bool_ops;
inode->i_ino = i|SEL_BOOL_INO_OFFSET;
d_add(dentry, inode);
}
out:
free_page((unsigned long)page);
return ret;
}
static ssize_t sel_read_avc_cache_threshold(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
char tmpbuf[TMPBUFLEN];
ssize_t length;
length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
avc_get_cache_threshold());
return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
}
static ssize_t sel_write_avc_cache_threshold(struct file *file,
const char __user *buf,
size_t count, loff_t *ppos)
{
char *page;
ssize_t ret;
unsigned int new_value;
ret = avc_has_perm(current_sid(), SECINITSID_SECURITY,
SECCLASS_SECURITY, SECURITY__SETSECPARAM,
NULL);
if (ret)
return ret;
if (count >= PAGE_SIZE)
return -ENOMEM;
/* No partial writes. */
if (*ppos != 0)
return -EINVAL;
page = memdup_user_nul(buf, count);
if (IS_ERR(page))
return PTR_ERR(page);
ret = -EINVAL;
if (sscanf(page, "%u", &new_value) != 1)
goto out;
avc_set_cache_threshold(new_value);
ret = count;
out:
kfree(page);
return ret;
}
static ssize_t sel_read_avc_hash_stats(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
char *page;
ssize_t length;
page = (char *)__get_free_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
length = avc_get_hash_stats(page);
if (length >= 0)
length = simple_read_from_buffer(buf, count, ppos, page, length);
free_page((unsigned long)page);
return length;
}
selinux: sidtab reverse lookup hash table This replaces the reverse table lookup and reverse cache with a hashtable which improves cache-miss reverse-lookup times from O(n) to O(1)* and maintains the same performance as a reverse cache hit. This reduces the time needed to add a new sidtab entry from ~500us to 5us on a Pixel 3 when there are ~10,000 sidtab entries. The implementation uses the kernel's generic hashtable API, It uses the context's string represtation as the hash source, and the kernels generic string hashing algorithm full_name_hash() to reduce the string to a 32 bit value. This change also maintains the improvement introduced in commit ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance") which removed the need to keep the current sidtab locked during policy reload. It does however introduce periodic locking of the target sidtab while converting the hashtable. Sidtab entries are never modified or removed, so the context struct stored in the sid_to_context tree can also be used for the context_to_sid hashtable to reduce memory usage. This bug was reported by: - On the selinux bug tracker. BUG: kernel softlockup due to too many SIDs/contexts #37 https://github.com/SELinuxProject/selinux-kernel/issues/37 - Jovana Knezevic on Android's bugtracker. Bug: 140252993 "During multi-user performance testing, we create and remove users many times. selinux_android_restorecon_pkgdir goes from 1ms to over 20ms after about 200 user creations and removals. Accumulated over ~280 packages, that adds a significant time to user creation, making perf benchmarks unreliable." * Hashtable lookup is only O(1) when n < the number of buckets. Signed-off-by: Jeff Vander Stoep <jeffv@google.com> Reported-by: Stephen Smalley <sds@tycho.nsa.gov> Reported-by: Jovana Knezevic <jovanak@google.com> Reviewed-by: Stephen Smalley <sds@tycho.nsa.gov> Tested-by: Stephen Smalley <sds@tycho.nsa.gov> [PM: subj tweak, removed changelog from patch description] Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-11-22 17:33:06 +08:00
static ssize_t sel_read_sidtab_hash_stats(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
char *page;
ssize_t length;
page = (char *)__get_free_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
length = security_sidtab_hash_stats(page);
selinux: sidtab reverse lookup hash table This replaces the reverse table lookup and reverse cache with a hashtable which improves cache-miss reverse-lookup times from O(n) to O(1)* and maintains the same performance as a reverse cache hit. This reduces the time needed to add a new sidtab entry from ~500us to 5us on a Pixel 3 when there are ~10,000 sidtab entries. The implementation uses the kernel's generic hashtable API, It uses the context's string represtation as the hash source, and the kernels generic string hashing algorithm full_name_hash() to reduce the string to a 32 bit value. This change also maintains the improvement introduced in commit ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance") which removed the need to keep the current sidtab locked during policy reload. It does however introduce periodic locking of the target sidtab while converting the hashtable. Sidtab entries are never modified or removed, so the context struct stored in the sid_to_context tree can also be used for the context_to_sid hashtable to reduce memory usage. This bug was reported by: - On the selinux bug tracker. BUG: kernel softlockup due to too many SIDs/contexts #37 https://github.com/SELinuxProject/selinux-kernel/issues/37 - Jovana Knezevic on Android's bugtracker. Bug: 140252993 "During multi-user performance testing, we create and remove users many times. selinux_android_restorecon_pkgdir goes from 1ms to over 20ms after about 200 user creations and removals. Accumulated over ~280 packages, that adds a significant time to user creation, making perf benchmarks unreliable." * Hashtable lookup is only O(1) when n < the number of buckets. Signed-off-by: Jeff Vander Stoep <jeffv@google.com> Reported-by: Stephen Smalley <sds@tycho.nsa.gov> Reported-by: Jovana Knezevic <jovanak@google.com> Reviewed-by: Stephen Smalley <sds@tycho.nsa.gov> Tested-by: Stephen Smalley <sds@tycho.nsa.gov> [PM: subj tweak, removed changelog from patch description] Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-11-22 17:33:06 +08:00
if (length >= 0)
length = simple_read_from_buffer(buf, count, ppos, page,
length);
free_page((unsigned long)page);
return length;
}
static const struct file_operations sel_sidtab_hash_stats_ops = {
.read = sel_read_sidtab_hash_stats,
.llseek = generic_file_llseek,
};
static const struct file_operations sel_avc_cache_threshold_ops = {
.read = sel_read_avc_cache_threshold,
.write = sel_write_avc_cache_threshold,
.llseek = generic_file_llseek,
};
static const struct file_operations sel_avc_hash_stats_ops = {
.read = sel_read_avc_hash_stats,
.llseek = generic_file_llseek,
};
#ifdef CONFIG_SECURITY_SELINUX_AVC_STATS
static struct avc_cache_stats *sel_avc_get_stat_idx(loff_t *idx)
{
int cpu;
for (cpu = *idx; cpu < nr_cpu_ids; ++cpu) {
if (!cpu_possible(cpu))
continue;
*idx = cpu + 1;
return &per_cpu(avc_cache_stats, cpu);
}
(*idx)++;
return NULL;
}
static void *sel_avc_stats_seq_start(struct seq_file *seq, loff_t *pos)
{
loff_t n = *pos - 1;
if (*pos == 0)
return SEQ_START_TOKEN;
return sel_avc_get_stat_idx(&n);
}
static void *sel_avc_stats_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
return sel_avc_get_stat_idx(pos);
}
static int sel_avc_stats_seq_show(struct seq_file *seq, void *v)
{
struct avc_cache_stats *st = v;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"lookups hits misses allocations reclaims frees\n");
} else {
unsigned int lookups = st->lookups;
unsigned int misses = st->misses;
unsigned int hits = lookups - misses;
seq_printf(seq, "%u %u %u %u %u %u\n", lookups,
hits, misses, st->allocations,
st->reclaims, st->frees);
}
return 0;
}
static void sel_avc_stats_seq_stop(struct seq_file *seq, void *v)
{ }
static const struct seq_operations sel_avc_cache_stats_seq_ops = {
.start = sel_avc_stats_seq_start,
.next = sel_avc_stats_seq_next,
.show = sel_avc_stats_seq_show,
.stop = sel_avc_stats_seq_stop,
};
static int sel_open_avc_cache_stats(struct inode *inode, struct file *file)
{
return seq_open(file, &sel_avc_cache_stats_seq_ops);
}
static const struct file_operations sel_avc_cache_stats_ops = {
.open = sel_open_avc_cache_stats,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
#endif
static int sel_make_avc_files(struct dentry *dir)
{
struct super_block *sb = dir->d_sb;
struct selinux_fs_info *fsi = sb->s_fs_info;
unsigned int i;
static const struct tree_descr files[] = {
{ "cache_threshold",
&sel_avc_cache_threshold_ops, S_IRUGO|S_IWUSR },
{ "hash_stats", &sel_avc_hash_stats_ops, S_IRUGO },
#ifdef CONFIG_SECURITY_SELINUX_AVC_STATS
{ "cache_stats", &sel_avc_cache_stats_ops, S_IRUGO },
#endif
};
for (i = 0; i < ARRAY_SIZE(files); i++) {
struct inode *inode;
struct dentry *dentry;
dentry = d_alloc_name(dir, files[i].name);
if (!dentry)
return -ENOMEM;
inode = sel_make_inode(dir->d_sb, S_IFREG|files[i].mode);
if (!inode) {
dput(dentry);
return -ENOMEM;
}
inode->i_fop = files[i].ops;
inode->i_ino = ++fsi->last_ino;
d_add(dentry, inode);
}
return 0;
}
selinux: sidtab reverse lookup hash table This replaces the reverse table lookup and reverse cache with a hashtable which improves cache-miss reverse-lookup times from O(n) to O(1)* and maintains the same performance as a reverse cache hit. This reduces the time needed to add a new sidtab entry from ~500us to 5us on a Pixel 3 when there are ~10,000 sidtab entries. The implementation uses the kernel's generic hashtable API, It uses the context's string represtation as the hash source, and the kernels generic string hashing algorithm full_name_hash() to reduce the string to a 32 bit value. This change also maintains the improvement introduced in commit ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance") which removed the need to keep the current sidtab locked during policy reload. It does however introduce periodic locking of the target sidtab while converting the hashtable. Sidtab entries are never modified or removed, so the context struct stored in the sid_to_context tree can also be used for the context_to_sid hashtable to reduce memory usage. This bug was reported by: - On the selinux bug tracker. BUG: kernel softlockup due to too many SIDs/contexts #37 https://github.com/SELinuxProject/selinux-kernel/issues/37 - Jovana Knezevic on Android's bugtracker. Bug: 140252993 "During multi-user performance testing, we create and remove users many times. selinux_android_restorecon_pkgdir goes from 1ms to over 20ms after about 200 user creations and removals. Accumulated over ~280 packages, that adds a significant time to user creation, making perf benchmarks unreliable." * Hashtable lookup is only O(1) when n < the number of buckets. Signed-off-by: Jeff Vander Stoep <jeffv@google.com> Reported-by: Stephen Smalley <sds@tycho.nsa.gov> Reported-by: Jovana Knezevic <jovanak@google.com> Reviewed-by: Stephen Smalley <sds@tycho.nsa.gov> Tested-by: Stephen Smalley <sds@tycho.nsa.gov> [PM: subj tweak, removed changelog from patch description] Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-11-22 17:33:06 +08:00
static int sel_make_ss_files(struct dentry *dir)
{
struct super_block *sb = dir->d_sb;
struct selinux_fs_info *fsi = sb->s_fs_info;
unsigned int i;
static const struct tree_descr files[] = {
selinux: sidtab reverse lookup hash table This replaces the reverse table lookup and reverse cache with a hashtable which improves cache-miss reverse-lookup times from O(n) to O(1)* and maintains the same performance as a reverse cache hit. This reduces the time needed to add a new sidtab entry from ~500us to 5us on a Pixel 3 when there are ~10,000 sidtab entries. The implementation uses the kernel's generic hashtable API, It uses the context's string represtation as the hash source, and the kernels generic string hashing algorithm full_name_hash() to reduce the string to a 32 bit value. This change also maintains the improvement introduced in commit ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance") which removed the need to keep the current sidtab locked during policy reload. It does however introduce periodic locking of the target sidtab while converting the hashtable. Sidtab entries are never modified or removed, so the context struct stored in the sid_to_context tree can also be used for the context_to_sid hashtable to reduce memory usage. This bug was reported by: - On the selinux bug tracker. BUG: kernel softlockup due to too many SIDs/contexts #37 https://github.com/SELinuxProject/selinux-kernel/issues/37 - Jovana Knezevic on Android's bugtracker. Bug: 140252993 "During multi-user performance testing, we create and remove users many times. selinux_android_restorecon_pkgdir goes from 1ms to over 20ms after about 200 user creations and removals. Accumulated over ~280 packages, that adds a significant time to user creation, making perf benchmarks unreliable." * Hashtable lookup is only O(1) when n < the number of buckets. Signed-off-by: Jeff Vander Stoep <jeffv@google.com> Reported-by: Stephen Smalley <sds@tycho.nsa.gov> Reported-by: Jovana Knezevic <jovanak@google.com> Reviewed-by: Stephen Smalley <sds@tycho.nsa.gov> Tested-by: Stephen Smalley <sds@tycho.nsa.gov> [PM: subj tweak, removed changelog from patch description] Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-11-22 17:33:06 +08:00
{ "sidtab_hash_stats", &sel_sidtab_hash_stats_ops, S_IRUGO },
};
for (i = 0; i < ARRAY_SIZE(files); i++) {
struct inode *inode;
struct dentry *dentry;
dentry = d_alloc_name(dir, files[i].name);
if (!dentry)
return -ENOMEM;
inode = sel_make_inode(dir->d_sb, S_IFREG|files[i].mode);
if (!inode) {
dput(dentry);
return -ENOMEM;
}
inode->i_fop = files[i].ops;
inode->i_ino = ++fsi->last_ino;
d_add(dentry, inode);
}
return 0;
}
static ssize_t sel_read_initcon(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
char *con;
u32 sid, len;
ssize_t ret;
sid = file_inode(file)->i_ino&SEL_INO_MASK;
ret = security_sid_to_context(sid, &con, &len);
if (ret)
return ret;
ret = simple_read_from_buffer(buf, count, ppos, con, len);
kfree(con);
return ret;
}
static const struct file_operations sel_initcon_ops = {
.read = sel_read_initcon,
.llseek = generic_file_llseek,
};
static int sel_make_initcon_files(struct dentry *dir)
{
unsigned int i;
for (i = 1; i <= SECINITSID_NUM; i++) {
struct inode *inode;
struct dentry *dentry;
selinux: remove unused initial SIDs and improve handling Remove initial SIDs that have never been used or are no longer used by the kernel from its string table, which is also used to generate the SECINITSID_* symbols referenced in code. Update the code to gracefully handle the fact that these can now be NULL. Stop treating it as an error if a policy defines additional initial SIDs unknown to the kernel. Do not load unused initial SID contexts into the sidtab. Fix the incorrect usage of the name from the ocontext in error messages when loading initial SIDs since these are not presently written to the kernel policy and are therefore always NULL. After this change, it is possible to safely reclaim and reuse some of the unused initial SIDs without compatibility issues. Specifically, unused initial SIDs that were being assigned the same context as the unlabeled initial SID in policies can be reclaimed and reused for another purpose, with existing policies still treating them as having the unlabeled context and future policies having the option of mapping them to a more specific context. For example, this could have been used when the infiniband labeling support was introduced to define initial SIDs for the default pkey and endport SIDs similar to the handling of port/netif/node SIDs rather than always using SECINITSID_UNLABELED as the default. The set of safely reclaimable unused initial SIDs across all known policies is igmp_packet (13), icmp_socket (14), tcp_socket (15), kmod (24), policy (25), and scmp_packet (26); these initial SIDs were assigned the same context as unlabeled in all known policies including mls. If only considering non-mls policies (i.e. assuming that mls users always upgrade policy with their kernels), the set of safely reclaimable unused initial SIDs further includes file_labels (6), init (7), sysctl_modprobe (16), and sysctl_fs (18) through sysctl_dev (23). Adding new initial SIDs beyond SECINITSID_NUM to policy unfortunately became a fatal error in commit 24ed7fdae669 ("selinux: use separate table for initial SID lookup") and even before that it could cause problems on a policy reload (collision between the new initial SID and one allocated at runtime) ever since commit 42596eafdd75 ("selinux: load the initial SIDs upon every policy load") so we cannot safely start adding new initial SIDs to policies beyond SECINITSID_NUM (27) until such a time as all such kernels do not need to be supported and only those that include this commit are relevant. That is not a big deal since we haven't added a new initial SID since 2004 (v2.6.7) and we have plenty of unused ones we can reclaim if we truly need one. If we want to avoid the wasted storage in initial_sid_to_string[] and/or sidtab->isids[] for the unused initial SIDs, we could introduce an indirection between the kernel initial SID values and the policy initial SID values and just map the policy SID values in the ocontexts to the kernel values during policy_load_isids(). Originally I thought we'd do this by preserving the initial SID names in the kernel policy and creating a mapping at load time like we do for the security classes and permissions but that would require a new kernel policy format version and associated changes to libsepol/checkpolicy and I'm not sure it is justified. Simpler approach is just to create a fixed mapping table in the kernel from the existing fixed policy values to the kernel values. Less flexible but probably sufficient. A separate selinux userspace change was applied in https://github.com/SELinuxProject/selinux/commit/8677ce5e8f592950ae6f14cea1b68a20ddc1ac25 to enable removal of most of the unused initial SID contexts from policies, but there is no dependency between that change and this one. That change permits removing all of the unused initial SID contexts from policy except for the fs and sysctl SID contexts. The initial SID declarations themselves would remain in policy to preserve the values of subsequent ones but the contexts can be dropped. If/when the kernel decides to reuse one of them, future policies can change the name and start assigning a context again without breaking compatibility. Here is how I would envision staging changes to the initial SIDs in a compatible manner after this commit is applied: 1. At any time after this commit is applied, the kernel could choose to reclaim one of the safely reclaimable unused initial SIDs listed above for a new purpose (i.e. replace its NULL entry in the initial_sid_to_string[] table with a new name and start using the newly generated SECINITSID_name symbol in code), and refpolicy could at that time rename its declaration of that initial SID to reflect its new purpose and start assigning it a context going forward. Existing/old policies would map the reclaimed initial SID to the unlabeled context, so that would be the initial default behavior until policies are updated. This doesn't depend on the selinux userspace change; it will work with existing policies and userspace. 2. In 6 months or so we'll have another SELinux userspace release that will include the libsepol/checkpolicy support for omitting unused initial SID contexts. 3. At any time after that release, refpolicy can make that release its minimum build requirement and drop the sid context statements (but not the sid declarations) for all of the unused initial SIDs except for fs and sysctl, which must remain for compatibility on policy reload with old kernels and for compatibility with kernels that were still using SECINITSID_SYSCTL (< 2.6.39). This doesn't depend on this kernel commit; it will work with previous kernels as well. 4. After N years for some value of N, refpolicy decides that it no longer cares about policy reload compatibility for kernels that predate this kernel commit, and refpolicy drops the fs and sysctl SID contexts from policy too (but retains the declarations). 5. After M years for some value of M, the kernel decides that it no longer cares about compatibility with refpolicies that predate step 4 (dropping the fs and sysctl SIDs), and those two SIDs also become safely reclaimable. This step is optional and need not ever occur unless we decide that the need to reclaim those two SIDs outweighs the compatibility cost. 6. After O years for some value of O, refpolicy decides that it no longer cares about policy load (not just reload) compatibility for kernels that predate this kernel commit, and both kernel and refpolicy can then start adding and using new initial SIDs beyond 27. This does not depend on the previous change (step 5) and can occur independent of it. Fixes: https://github.com/SELinuxProject/selinux-kernel/issues/12 Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-02-25 00:10:23 +08:00
const char *s = security_get_initial_sid_context(i);
if (!s)
continue;
dentry = d_alloc_name(dir, s);
if (!dentry)
return -ENOMEM;
inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO);
if (!inode) {
dput(dentry);
return -ENOMEM;
}
inode->i_fop = &sel_initcon_ops;
inode->i_ino = i|SEL_INITCON_INO_OFFSET;
d_add(dentry, inode);
}
return 0;
}
static inline unsigned long sel_class_to_ino(u16 class)
{
return (class * (SEL_VEC_MAX + 1)) | SEL_CLASS_INO_OFFSET;
}
static inline u16 sel_ino_to_class(unsigned long ino)
{
return (ino & SEL_INO_MASK) / (SEL_VEC_MAX + 1);
}
static inline unsigned long sel_perm_to_ino(u16 class, u32 perm)
{
return (class * (SEL_VEC_MAX + 1) + perm) | SEL_CLASS_INO_OFFSET;
}
static inline u32 sel_ino_to_perm(unsigned long ino)
{
return (ino & SEL_INO_MASK) % (SEL_VEC_MAX + 1);
}
static ssize_t sel_read_class(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
unsigned long ino = file_inode(file)->i_ino;
char res[TMPBUFLEN];
ssize_t len = scnprintf(res, sizeof(res), "%d", sel_ino_to_class(ino));
return simple_read_from_buffer(buf, count, ppos, res, len);
}
static const struct file_operations sel_class_ops = {
.read = sel_read_class,
.llseek = generic_file_llseek,
};
static ssize_t sel_read_perm(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
unsigned long ino = file_inode(file)->i_ino;
char res[TMPBUFLEN];
ssize_t len = scnprintf(res, sizeof(res), "%d", sel_ino_to_perm(ino));
return simple_read_from_buffer(buf, count, ppos, res, len);
}
static const struct file_operations sel_perm_ops = {
.read = sel_read_perm,
.llseek = generic_file_llseek,
};
static ssize_t sel_read_policycap(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
int value;
char tmpbuf[TMPBUFLEN];
ssize_t length;
unsigned long i_ino = file_inode(file)->i_ino;
value = security_policycap_supported(i_ino & SEL_INO_MASK);
length = scnprintf(tmpbuf, TMPBUFLEN, "%d", value);
return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
}
static const struct file_operations sel_policycap_ops = {
.read = sel_read_policycap,
.llseek = generic_file_llseek,
};
selinux: move policy commit after updating selinuxfs With the refactoring of the policy load logic in the security server from the previous change, it is now possible to split out the committing of the new policy from security_load_policy() and perform it only after successful updating of selinuxfs. Change security_load_policy() to return the newly populated policy data structures to the caller, export selinux_policy_commit() for external callers, and introduce selinux_policy_cancel() to provide a way to cancel the policy load in the event of an error during updating of the selinuxfs directory tree. Further, rework the interfaces used by selinuxfs to get information from the policy when creating the new directory tree to take and act upon the new policy data structure rather than the current/active policy. Update selinuxfs to use these updated and new interfaces. While we are here, stop re-creating the policy_capabilities directory on each policy load since it does not depend on the policy, and stop trying to create the booleans and classes directories during the initial creation of selinuxfs since no information is available until first policy load. After this change, a failure while updating the booleans and class directories will cause the entire policy load to be canceled, leaving the original policy intact, and policy load notifications to userspace will only happen after a successful completion of updating those directories. This does not (yet) provide full atomicity with respect to the updating of the directory trees themselves. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-07 21:29:34 +08:00
static int sel_make_perm_files(struct selinux_policy *newpolicy,
char *objclass, int classvalue,
struct dentry *dir)
{
u32 i, nperms;
int rc;
char **perms;
selinux: move policy commit after updating selinuxfs With the refactoring of the policy load logic in the security server from the previous change, it is now possible to split out the committing of the new policy from security_load_policy() and perform it only after successful updating of selinuxfs. Change security_load_policy() to return the newly populated policy data structures to the caller, export selinux_policy_commit() for external callers, and introduce selinux_policy_cancel() to provide a way to cancel the policy load in the event of an error during updating of the selinuxfs directory tree. Further, rework the interfaces used by selinuxfs to get information from the policy when creating the new directory tree to take and act upon the new policy data structure rather than the current/active policy. Update selinuxfs to use these updated and new interfaces. While we are here, stop re-creating the policy_capabilities directory on each policy load since it does not depend on the policy, and stop trying to create the booleans and classes directories during the initial creation of selinuxfs since no information is available until first policy load. After this change, a failure while updating the booleans and class directories will cause the entire policy load to be canceled, leaving the original policy intact, and policy load notifications to userspace will only happen after a successful completion of updating those directories. This does not (yet) provide full atomicity with respect to the updating of the directory trees themselves. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-07 21:29:34 +08:00
rc = security_get_permissions(newpolicy, objclass, &perms, &nperms);
if (rc)
return rc;
for (i = 0; i < nperms; i++) {
struct inode *inode;
struct dentry *dentry;
rc = -ENOMEM;
dentry = d_alloc_name(dir, perms[i]);
if (!dentry)
goto out;
rc = -ENOMEM;
inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO);
if (!inode) {
dput(dentry);
goto out;
}
inode->i_fop = &sel_perm_ops;
/* i+1 since perm values are 1-indexed */
inode->i_ino = sel_perm_to_ino(classvalue, i + 1);
d_add(dentry, inode);
}
rc = 0;
out:
for (i = 0; i < nperms; i++)
kfree(perms[i]);
kfree(perms);
return rc;
}
selinux: move policy commit after updating selinuxfs With the refactoring of the policy load logic in the security server from the previous change, it is now possible to split out the committing of the new policy from security_load_policy() and perform it only after successful updating of selinuxfs. Change security_load_policy() to return the newly populated policy data structures to the caller, export selinux_policy_commit() for external callers, and introduce selinux_policy_cancel() to provide a way to cancel the policy load in the event of an error during updating of the selinuxfs directory tree. Further, rework the interfaces used by selinuxfs to get information from the policy when creating the new directory tree to take and act upon the new policy data structure rather than the current/active policy. Update selinuxfs to use these updated and new interfaces. While we are here, stop re-creating the policy_capabilities directory on each policy load since it does not depend on the policy, and stop trying to create the booleans and classes directories during the initial creation of selinuxfs since no information is available until first policy load. After this change, a failure while updating the booleans and class directories will cause the entire policy load to be canceled, leaving the original policy intact, and policy load notifications to userspace will only happen after a successful completion of updating those directories. This does not (yet) provide full atomicity with respect to the updating of the directory trees themselves. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-07 21:29:34 +08:00
static int sel_make_class_dir_entries(struct selinux_policy *newpolicy,
char *classname, int index,
struct dentry *dir)
{
struct super_block *sb = dir->d_sb;
struct selinux_fs_info *fsi = sb->s_fs_info;
struct dentry *dentry = NULL;
struct inode *inode = NULL;
dentry = d_alloc_name(dir, "index");
if (!dentry)
return -ENOMEM;
inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO);
if (!inode) {
dput(dentry);
return -ENOMEM;
}
inode->i_fop = &sel_class_ops;
inode->i_ino = sel_class_to_ino(index);
d_add(dentry, inode);
dentry = sel_make_dir(dir, "perms", &fsi->last_class_ino);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
return sel_make_perm_files(newpolicy, classname, index, dentry);
}
static int sel_make_classes(struct selinux_policy *newpolicy,
struct dentry *class_dir,
unsigned long *last_class_ino)
{
u32 i, nclasses;
int rc;
char **classes;
selinux: move policy commit after updating selinuxfs With the refactoring of the policy load logic in the security server from the previous change, it is now possible to split out the committing of the new policy from security_load_policy() and perform it only after successful updating of selinuxfs. Change security_load_policy() to return the newly populated policy data structures to the caller, export selinux_policy_commit() for external callers, and introduce selinux_policy_cancel() to provide a way to cancel the policy load in the event of an error during updating of the selinuxfs directory tree. Further, rework the interfaces used by selinuxfs to get information from the policy when creating the new directory tree to take and act upon the new policy data structure rather than the current/active policy. Update selinuxfs to use these updated and new interfaces. While we are here, stop re-creating the policy_capabilities directory on each policy load since it does not depend on the policy, and stop trying to create the booleans and classes directories during the initial creation of selinuxfs since no information is available until first policy load. After this change, a failure while updating the booleans and class directories will cause the entire policy load to be canceled, leaving the original policy intact, and policy load notifications to userspace will only happen after a successful completion of updating those directories. This does not (yet) provide full atomicity with respect to the updating of the directory trees themselves. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-07 21:29:34 +08:00
rc = security_get_classes(newpolicy, &classes, &nclasses);
if (rc)
return rc;
/* +2 since classes are 1-indexed */
*last_class_ino = sel_class_to_ino(nclasses + 2);
for (i = 0; i < nclasses; i++) {
struct dentry *class_name_dir;
class_name_dir = sel_make_dir(class_dir, classes[i],
last_class_ino);
if (IS_ERR(class_name_dir)) {
rc = PTR_ERR(class_name_dir);
goto out;
}
/* i+1 since class values are 1-indexed */
selinux: move policy commit after updating selinuxfs With the refactoring of the policy load logic in the security server from the previous change, it is now possible to split out the committing of the new policy from security_load_policy() and perform it only after successful updating of selinuxfs. Change security_load_policy() to return the newly populated policy data structures to the caller, export selinux_policy_commit() for external callers, and introduce selinux_policy_cancel() to provide a way to cancel the policy load in the event of an error during updating of the selinuxfs directory tree. Further, rework the interfaces used by selinuxfs to get information from the policy when creating the new directory tree to take and act upon the new policy data structure rather than the current/active policy. Update selinuxfs to use these updated and new interfaces. While we are here, stop re-creating the policy_capabilities directory on each policy load since it does not depend on the policy, and stop trying to create the booleans and classes directories during the initial creation of selinuxfs since no information is available until first policy load. After this change, a failure while updating the booleans and class directories will cause the entire policy load to be canceled, leaving the original policy intact, and policy load notifications to userspace will only happen after a successful completion of updating those directories. This does not (yet) provide full atomicity with respect to the updating of the directory trees themselves. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-07 21:29:34 +08:00
rc = sel_make_class_dir_entries(newpolicy, classes[i], i + 1,
class_name_dir);
if (rc)
goto out;
}
rc = 0;
out:
for (i = 0; i < nclasses; i++)
kfree(classes[i]);
kfree(classes);
return rc;
}
static int sel_make_policycap(struct selinux_fs_info *fsi)
{
unsigned int iter;
struct dentry *dentry = NULL;
struct inode *inode = NULL;
for (iter = 0; iter <= POLICYDB_CAP_MAX; iter++) {
if (iter < ARRAY_SIZE(selinux_policycap_names))
dentry = d_alloc_name(fsi->policycap_dir,
selinux_policycap_names[iter]);
else
dentry = d_alloc_name(fsi->policycap_dir, "unknown");
if (dentry == NULL)
return -ENOMEM;
inode = sel_make_inode(fsi->sb, S_IFREG | 0444);
if (inode == NULL) {
dput(dentry);
return -ENOMEM;
}
inode->i_fop = &sel_policycap_ops;
inode->i_ino = iter | SEL_POLICYCAP_INO_OFFSET;
d_add(dentry, inode);
}
return 0;
}
static struct dentry *sel_make_dir(struct dentry *dir, const char *name,
unsigned long *ino)
{
struct dentry *dentry = d_alloc_name(dir, name);
struct inode *inode;
if (!dentry)
return ERR_PTR(-ENOMEM);
inode = sel_make_inode(dir->d_sb, S_IFDIR | S_IRUGO | S_IXUGO);
if (!inode) {
dput(dentry);
return ERR_PTR(-ENOMEM);
}
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
inode->i_ino = ++(*ino);
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
d_add(dentry, inode);
/* bump link count on parent directory, too */
inc_nlink(d_inode(dir));
return dentry;
}
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
static int reject_all(struct mnt_idmap *idmap, struct inode *inode, int mask)
{
return -EPERM; // no access for anyone, root or no root.
}
static const struct inode_operations swapover_dir_inode_operations = {
.lookup = simple_lookup,
.permission = reject_all,
};
static struct dentry *sel_make_swapover_dir(struct super_block *sb,
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
unsigned long *ino)
{
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
struct dentry *dentry = d_alloc_name(sb->s_root, ".swapover");
struct inode *inode;
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
if (!dentry)
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
return ERR_PTR(-ENOMEM);
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
inode = sel_make_inode(sb, S_IFDIR);
if (!inode) {
dput(dentry);
return ERR_PTR(-ENOMEM);
}
inode->i_op = &swapover_dir_inode_operations;
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
inode->i_ino = ++(*ino);
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
selinux: saner handling of policy reloads On policy reload selinuxfs replaces two subdirectories (/booleans and /class) with new variants. Unfortunately, that's done with serious abuses of directory locking. 1) lock_rename() should be done to parents, not to objects being exchanged 2) there's a bunch of reasons why it should not be done for directories that do not have a common ancestor; most of those do not apply to selinuxfs, but even in the best case the proof is subtle and brittle. 3) failure halfway through the creation of /class will leak names and values arrays. 4) use of d_genocide() is also rather brittle; it's probably not much of a bug per se, but e.g. an overmount of /sys/fs/selinuxfs/classes/shm/index with any regular file will end up with leaked mount on policy reload. Sure, don't do it, but... Let's stop messing with disconnected directories; just create a temporary (/.swapover) with no permissions for anyone (on the level of ->permission() returing -EPERM, no matter who's calling it) and build the new /booleans and /class in there; then lock_rename on root and that temporary directory and d_exchange() old and new both for class and booleans. Then unlock and use simple_recursive_removal() to take the temporary out; it's much more robust. And instead of bothering with separate pathways for freeing new (on failure halfway through) and old (on success) names/values, do all freeing in one place. With temporaries swapped with the old ones when we are past all possible failures. The only user-visible difference is that /.swapover shows up (but isn't possible to open, look up into, etc.) for the duration of policy reload. Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> [PM: applied some fixes from Al post merge] Signed-off-by: Paul Moore <paul@paul-moore.com>
2023-10-17 06:08:35 +08:00
inode_lock(sb->s_root->d_inode);
d_add(dentry, inode);
inc_nlink(sb->s_root->d_inode);
inode_unlock(sb->s_root->d_inode);
return dentry;
selinux: Create new booleans and class dirs out of tree In order to avoid concurrency issues around selinuxfs resource availability during policy load, we first create new directories out of tree for reloaded resources, then swap them in, and finally delete the old versions. This fix focuses on concurrency in each of the two subtrees swapped, and not concurrency between the trees. This means that it is still possible that subsequent reads to eg the booleans directory and the class directory during a policy load could see the old state for one and the new for the other. The problem of ensuring that policy loads are fully atomic from the perspective of userspace is larger than what is dealt with here. This commit focuses on ensuring that the directories contents always match either the new or the old policy state from the perspective of userspace. In the previous implementation, on policy load /sys/fs/selinux is updated by deleting the previous contents of /sys/fs/selinux/{class,booleans} and then recreating them. This means that there is a period of time when the contents of these directories do not exist which can cause race conditions as userspace relies on them for information about the policy. In addition, it means that error recovery in the event of failure is challenging. In order to demonstrate the race condition that this series fixes, you can use the following commands: while true; do cat /sys/fs/selinux/class/service/perms/status >/dev/null; done & while true; do load_policy; done; In the existing code, this will display errors fairly often as the class lookup fails. (In normal operation from systemd, this would result in a permission check which would be allowed or denied based on policy settings around unknown object classes.) After applying this patch series you should expect to no longer see such error messages. Signed-off-by: Daniel Burgener <dburgener@linux.microsoft.com> Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-20 03:59:35 +08:00
}
#define NULL_FILE_NAME "null"
static int sel_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct selinux_fs_info *fsi;
int ret;
struct dentry *dentry;
struct inode *inode;
struct inode_security_struct *isec;
static const struct tree_descr selinux_files[] = {
[SEL_LOAD] = {"load", &sel_load_ops, S_IRUSR|S_IWUSR},
[SEL_ENFORCE] = {"enforce", &sel_enforce_ops, S_IRUGO|S_IWUSR},
[SEL_CONTEXT] = {"context", &transaction_ops, S_IRUGO|S_IWUGO},
[SEL_ACCESS] = {"access", &transaction_ops, S_IRUGO|S_IWUGO},
[SEL_CREATE] = {"create", &transaction_ops, S_IRUGO|S_IWUGO},
[SEL_RELABEL] = {"relabel", &transaction_ops, S_IRUGO|S_IWUGO},
[SEL_USER] = {"user", &transaction_ops, S_IRUGO|S_IWUGO},
[SEL_POLICYVERS] = {"policyvers", &sel_policyvers_ops, S_IRUGO},
[SEL_COMMIT_BOOLS] = {"commit_pending_bools", &sel_commit_bools_ops, S_IWUSR},
[SEL_MLS] = {"mls", &sel_mls_ops, S_IRUGO},
[SEL_DISABLE] = {"disable", &sel_disable_ops, S_IWUSR},
[SEL_MEMBER] = {"member", &transaction_ops, S_IRUGO|S_IWUGO},
[SEL_CHECKREQPROT] = {"checkreqprot", &sel_checkreqprot_ops, S_IRUGO|S_IWUSR},
[SEL_REJECT_UNKNOWN] = {"reject_unknown", &sel_handle_unknown_ops, S_IRUGO},
[SEL_DENY_UNKNOWN] = {"deny_unknown", &sel_handle_unknown_ops, S_IRUGO},
selinux: fast status update interface (/selinux/status) This patch provides a new /selinux/status entry which allows applications read-only mmap(2). This region reflects selinux_kernel_status structure in kernel space. struct selinux_kernel_status { u32 length; /* length of this structure */ u32 sequence; /* sequence number of seqlock logic */ u32 enforcing; /* current setting of enforcing mode */ u32 policyload; /* times of policy reloaded */ u32 deny_unknown; /* current setting of deny_unknown */ }; When userspace object manager caches access control decisions provided by SELinux, it needs to invalidate the cache on policy reload and setenforce to keep consistency. However, the applications need to check the kernel state for each accesses on userspace avc, or launch a background worker process. In heuristic, frequency of invalidation is much less than frequency of making access control decision, so it is annoying to invoke a system call to check we don't need to invalidate the userspace cache. If we can use a background worker thread, it allows to receive invalidation messages from the kernel. But it requires us an invasive coding toward the base application in some cases; E.g, when we provide a feature performing with SELinux as a plugin module, it is unwelcome manner to launch its own worker thread from the module. If we could map /selinux/status to process memory space, application can know updates of selinux status; policy reload or setenforce. A typical application checks selinux_kernel_status::sequence when it tries to reference userspace avc. If it was changed from the last time when it checked userspace avc, it means something was updated in the kernel space. Then, the application can reset userspace avc or update current enforcing mode, without any system call invocations. This sequence number is updated according to the seqlock logic, so we need to wait for a while if it is odd number. Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com> Acked-by: Eric Paris <eparis@redhat.com> -- security/selinux/include/security.h | 21 ++++++ security/selinux/selinuxfs.c | 56 +++++++++++++++ security/selinux/ss/Makefile | 2 +- security/selinux/ss/services.c | 3 + security/selinux/ss/status.c | 129 +++++++++++++++++++++++++++++++++++ 5 files changed, 210 insertions(+), 1 deletions(-) Signed-off-by: James Morris <jmorris@namei.org>
2010-09-14 17:28:39 +08:00
[SEL_STATUS] = {"status", &sel_handle_status_ops, S_IRUGO},
[SEL_POLICY] = {"policy", &sel_policy_ops, S_IRUGO},
[SEL_VALIDATE_TRANS] = {"validatetrans", &sel_transition_ops,
S_IWUGO},
/* last one */ {""}
};
ret = selinux_fs_info_create(sb);
if (ret)
goto err;
ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files);
if (ret)
goto err;
fsi = sb->s_fs_info;
fsi->bool_dir = sel_make_dir(sb->s_root, BOOL_DIR_NAME, &fsi->last_ino);
if (IS_ERR(fsi->bool_dir)) {
ret = PTR_ERR(fsi->bool_dir);
fsi->bool_dir = NULL;
goto err;
}
ret = -ENOMEM;
dentry = d_alloc_name(sb->s_root, NULL_FILE_NAME);
if (!dentry)
goto err;
ret = -ENOMEM;
inode = sel_make_inode(sb, S_IFCHR | S_IRUGO | S_IWUGO);
if (!inode) {
dput(dentry);
goto err;
}
inode->i_ino = ++fsi->last_ino;
isec = selinux_inode(inode);
isec->sid = SECINITSID_DEVNULL;
isec->sclass = SECCLASS_CHR_FILE;
isec->initialized = LABEL_INITIALIZED;
init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO, MKDEV(MEM_MAJOR, 3));
d_add(dentry, inode);
dentry = sel_make_dir(sb->s_root, "avc", &fsi->last_ino);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto err;
}
ret = sel_make_avc_files(dentry);
if (ret)
goto err;
selinux: sidtab reverse lookup hash table This replaces the reverse table lookup and reverse cache with a hashtable which improves cache-miss reverse-lookup times from O(n) to O(1)* and maintains the same performance as a reverse cache hit. This reduces the time needed to add a new sidtab entry from ~500us to 5us on a Pixel 3 when there are ~10,000 sidtab entries. The implementation uses the kernel's generic hashtable API, It uses the context's string represtation as the hash source, and the kernels generic string hashing algorithm full_name_hash() to reduce the string to a 32 bit value. This change also maintains the improvement introduced in commit ee1a84fdfeed ("selinux: overhaul sidtab to fix bug and improve performance") which removed the need to keep the current sidtab locked during policy reload. It does however introduce periodic locking of the target sidtab while converting the hashtable. Sidtab entries are never modified or removed, so the context struct stored in the sid_to_context tree can also be used for the context_to_sid hashtable to reduce memory usage. This bug was reported by: - On the selinux bug tracker. BUG: kernel softlockup due to too many SIDs/contexts #37 https://github.com/SELinuxProject/selinux-kernel/issues/37 - Jovana Knezevic on Android's bugtracker. Bug: 140252993 "During multi-user performance testing, we create and remove users many times. selinux_android_restorecon_pkgdir goes from 1ms to over 20ms after about 200 user creations and removals. Accumulated over ~280 packages, that adds a significant time to user creation, making perf benchmarks unreliable." * Hashtable lookup is only O(1) when n < the number of buckets. Signed-off-by: Jeff Vander Stoep <jeffv@google.com> Reported-by: Stephen Smalley <sds@tycho.nsa.gov> Reported-by: Jovana Knezevic <jovanak@google.com> Reviewed-by: Stephen Smalley <sds@tycho.nsa.gov> Tested-by: Stephen Smalley <sds@tycho.nsa.gov> [PM: subj tweak, removed changelog from patch description] Signed-off-by: Paul Moore <paul@paul-moore.com>
2019-11-22 17:33:06 +08:00
dentry = sel_make_dir(sb->s_root, "ss", &fsi->last_ino);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto err;
}
ret = sel_make_ss_files(dentry);
if (ret)
goto err;
dentry = sel_make_dir(sb->s_root, "initial_contexts", &fsi->last_ino);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto err;
}
ret = sel_make_initcon_files(dentry);
if (ret)
goto err;
fsi->class_dir = sel_make_dir(sb->s_root, CLASS_DIR_NAME, &fsi->last_ino);
if (IS_ERR(fsi->class_dir)) {
ret = PTR_ERR(fsi->class_dir);
fsi->class_dir = NULL;
goto err;
}
fsi->policycap_dir = sel_make_dir(sb->s_root, POLICYCAP_DIR_NAME,
&fsi->last_ino);
if (IS_ERR(fsi->policycap_dir)) {
ret = PTR_ERR(fsi->policycap_dir);
fsi->policycap_dir = NULL;
goto err;
}
selinux: move policy commit after updating selinuxfs With the refactoring of the policy load logic in the security server from the previous change, it is now possible to split out the committing of the new policy from security_load_policy() and perform it only after successful updating of selinuxfs. Change security_load_policy() to return the newly populated policy data structures to the caller, export selinux_policy_commit() for external callers, and introduce selinux_policy_cancel() to provide a way to cancel the policy load in the event of an error during updating of the selinuxfs directory tree. Further, rework the interfaces used by selinuxfs to get information from the policy when creating the new directory tree to take and act upon the new policy data structure rather than the current/active policy. Update selinuxfs to use these updated and new interfaces. While we are here, stop re-creating the policy_capabilities directory on each policy load since it does not depend on the policy, and stop trying to create the booleans and classes directories during the initial creation of selinuxfs since no information is available until first policy load. After this change, a failure while updating the booleans and class directories will cause the entire policy load to be canceled, leaving the original policy intact, and policy load notifications to userspace will only happen after a successful completion of updating those directories. This does not (yet) provide full atomicity with respect to the updating of the directory trees themselves. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-07 21:29:34 +08:00
ret = sel_make_policycap(fsi);
if (ret) {
pr_err("SELinux: failed to load policy capabilities\n");
goto err;
selinux: move policy commit after updating selinuxfs With the refactoring of the policy load logic in the security server from the previous change, it is now possible to split out the committing of the new policy from security_load_policy() and perform it only after successful updating of selinuxfs. Change security_load_policy() to return the newly populated policy data structures to the caller, export selinux_policy_commit() for external callers, and introduce selinux_policy_cancel() to provide a way to cancel the policy load in the event of an error during updating of the selinuxfs directory tree. Further, rework the interfaces used by selinuxfs to get information from the policy when creating the new directory tree to take and act upon the new policy data structure rather than the current/active policy. Update selinuxfs to use these updated and new interfaces. While we are here, stop re-creating the policy_capabilities directory on each policy load since it does not depend on the policy, and stop trying to create the booleans and classes directories during the initial creation of selinuxfs since no information is available until first policy load. After this change, a failure while updating the booleans and class directories will cause the entire policy load to be canceled, leaving the original policy intact, and policy load notifications to userspace will only happen after a successful completion of updating those directories. This does not (yet) provide full atomicity with respect to the updating of the directory trees themselves. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
2020-08-07 21:29:34 +08:00
}
return 0;
err:
pr_err("SELinux: %s: failed while creating inodes\n",
__func__);
selinux_fs_info_free(sb);
return ret;
}
static int sel_get_tree(struct fs_context *fc)
{
return get_tree_single(fc, sel_fill_super);
}
static const struct fs_context_operations sel_context_ops = {
.get_tree = sel_get_tree,
};
static int sel_init_fs_context(struct fs_context *fc)
{
fc->ops = &sel_context_ops;
return 0;
}
static void sel_kill_sb(struct super_block *sb)
{
selinux_fs_info_free(sb);
kill_litter_super(sb);
}
static struct file_system_type sel_fs_type = {
.name = "selinuxfs",
.init_fs_context = sel_init_fs_context,
.kill_sb = sel_kill_sb,
};
struct path selinux_null __ro_after_init;
static int __init init_sel_fs(void)
{
struct qstr null_name = QSTR_INIT(NULL_FILE_NAME,
sizeof(NULL_FILE_NAME)-1);
int err;
if (!selinux_enabled_boot)
return 0;
err = sysfs_create_mount_point(fs_kobj, "selinux");
if (err)
return err;
err = register_filesystem(&sel_fs_type);
if (err) {
sysfs_remove_mount_point(fs_kobj, "selinux");
return err;
}
selinux_null.mnt = kern_mount(&sel_fs_type);
if (IS_ERR(selinux_null.mnt)) {
pr_err("selinuxfs: could not mount!\n");
err = PTR_ERR(selinux_null.mnt);
selinux_null.mnt = NULL;
return err;
}
selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root,
&null_name);
if (IS_ERR(selinux_null.dentry)) {
pr_err("selinuxfs: could not lookup null!\n");
err = PTR_ERR(selinux_null.dentry);
selinux_null.dentry = NULL;
return err;
}
return err;
}
__initcall(init_sel_fs);