2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-30 08:04:13 +08:00
linux-next/net/core/scm.c
Eric Dumazet 16e5726269 af_unix: dont send SCM_CREDENTIALS by default
Since commit 7361c36c52 (af_unix: Allow credentials to work across
user and pid namespaces) af_unix performance dropped a lot.

This is because we now take a reference on pid and cred in each write(),
and release them in read(), usually done from another process,
eventually from another cpu. This triggers false sharing.

# Events: 154K cycles
#
# Overhead  Command       Shared Object        Symbol
# ........  .......  ..................  .........................
#
    10.40%  hackbench  [kernel.kallsyms]   [k] put_pid
     8.60%  hackbench  [kernel.kallsyms]   [k] unix_stream_recvmsg
     7.87%  hackbench  [kernel.kallsyms]   [k] unix_stream_sendmsg
     6.11%  hackbench  [kernel.kallsyms]   [k] do_raw_spin_lock
     4.95%  hackbench  [kernel.kallsyms]   [k] unix_scm_to_skb
     4.87%  hackbench  [kernel.kallsyms]   [k] pid_nr_ns
     4.34%  hackbench  [kernel.kallsyms]   [k] cred_to_ucred
     2.39%  hackbench  [kernel.kallsyms]   [k] unix_destruct_scm
     2.24%  hackbench  [kernel.kallsyms]   [k] sub_preempt_count
     1.75%  hackbench  [kernel.kallsyms]   [k] fget_light
     1.51%  hackbench  [kernel.kallsyms]   [k]
__mutex_lock_interruptible_slowpath
     1.42%  hackbench  [kernel.kallsyms]   [k] sock_alloc_send_pskb

This patch includes SCM_CREDENTIALS information in a af_unix message/skb
only if requested by the sender, [man 7 unix for details how to include
ancillary data using sendmsg() system call]

Note: This might break buggy applications that expected SCM_CREDENTIAL
from an unaware write() system call, and receiver not using SO_PASSCRED
socket option.

If SOCK_PASSCRED is set on source or destination socket, we still
include credentials for mere write() syscalls.

Performance boost in hackbench : more than 50% gain on a 16 thread
machine (2 quad-core cpus, 2 threads per core)

hackbench 20 thread 2000

4.228 sec instead of 9.102 sec

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-09-28 13:29:50 -04:00

347 lines
7.7 KiB
C

/* scm.c - Socket level control messages processing.
*
* Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
* Alignment and value checking mods by Craig Metz
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/signal.h>
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/stat.h>
#include <linux/socket.h>
#include <linux/file.h>
#include <linux/fcntl.h>
#include <linux/net.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/security.h>
#include <linux/pid.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/compat.h>
#include <net/scm.h>
/*
* Only allow a user to send credentials, that they could set with
* setu(g)id.
*/
static __inline__ int scm_check_creds(struct ucred *creds)
{
const struct cred *cred = current_cred();
if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
((creds->uid == cred->uid || creds->uid == cred->euid ||
creds->uid == cred->suid) || capable(CAP_SETUID)) &&
((creds->gid == cred->gid || creds->gid == cred->egid ||
creds->gid == cred->sgid) || capable(CAP_SETGID))) {
return 0;
}
return -EPERM;
}
static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
{
int *fdp = (int*)CMSG_DATA(cmsg);
struct scm_fp_list *fpl = *fplp;
struct file **fpp;
int i, num;
num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
if (num <= 0)
return 0;
if (num > SCM_MAX_FD)
return -EINVAL;
if (!fpl)
{
fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
if (!fpl)
return -ENOMEM;
*fplp = fpl;
fpl->count = 0;
fpl->max = SCM_MAX_FD;
}
fpp = &fpl->fp[fpl->count];
if (fpl->count + num > fpl->max)
return -EINVAL;
/*
* Verify the descriptors and increment the usage count.
*/
for (i=0; i< num; i++)
{
int fd = fdp[i];
struct file *file;
if (fd < 0 || !(file = fget_raw(fd)))
return -EBADF;
*fpp++ = file;
fpl->count++;
}
return num;
}
void __scm_destroy(struct scm_cookie *scm)
{
struct scm_fp_list *fpl = scm->fp;
int i;
if (fpl) {
scm->fp = NULL;
if (current->scm_work_list) {
list_add_tail(&fpl->list, current->scm_work_list);
} else {
LIST_HEAD(work_list);
current->scm_work_list = &work_list;
list_add(&fpl->list, &work_list);
while (!list_empty(&work_list)) {
fpl = list_first_entry(&work_list, struct scm_fp_list, list);
list_del(&fpl->list);
for (i=fpl->count-1; i>=0; i--)
fput(fpl->fp[i]);
kfree(fpl);
}
current->scm_work_list = NULL;
}
}
}
EXPORT_SYMBOL(__scm_destroy);
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
struct cmsghdr *cmsg;
int err;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
{
err = -EINVAL;
/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
/* The first check was omitted in <= 2.2.5. The reasoning was
that parser checks cmsg_len in any case, so that
additional check would be work duplication.
But if cmsg_level is not SOL_SOCKET, we do not check
for too short ancillary data object at all! Oops.
OK, let's add it...
*/
if (!CMSG_OK(msg, cmsg))
goto error;
if (cmsg->cmsg_level != SOL_SOCKET)
continue;
switch (cmsg->cmsg_type)
{
case SCM_RIGHTS:
if (!sock->ops || sock->ops->family != PF_UNIX)
goto error;
err=scm_fp_copy(cmsg, &p->fp);
if (err<0)
goto error;
break;
case SCM_CREDENTIALS:
if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
goto error;
memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
err = scm_check_creds(&p->creds);
if (err)
goto error;
if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
struct pid *pid;
err = -ESRCH;
pid = find_get_pid(p->creds.pid);
if (!pid)
goto error;
put_pid(p->pid);
p->pid = pid;
}
if (!p->cred ||
(p->cred->euid != p->creds.uid) ||
(p->cred->egid != p->creds.gid)) {
struct cred *cred;
err = -ENOMEM;
cred = prepare_creds();
if (!cred)
goto error;
cred->uid = cred->euid = p->creds.uid;
cred->gid = cred->egid = p->creds.gid;
if (p->cred)
put_cred(p->cred);
p->cred = cred;
}
break;
default:
goto error;
}
}
if (p->fp && !p->fp->count)
{
kfree(p->fp);
p->fp = NULL;
}
return 0;
error:
scm_destroy(p);
return err;
}
EXPORT_SYMBOL(__scm_send);
int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
{
struct cmsghdr __user *cm
= (__force struct cmsghdr __user *)msg->msg_control;
struct cmsghdr cmhdr;
int cmlen = CMSG_LEN(len);
int err;
if (MSG_CMSG_COMPAT & msg->msg_flags)
return put_cmsg_compat(msg, level, type, len, data);
if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
msg->msg_flags |= MSG_CTRUNC;
return 0; /* XXX: return error? check spec. */
}
if (msg->msg_controllen < cmlen) {
msg->msg_flags |= MSG_CTRUNC;
cmlen = msg->msg_controllen;
}
cmhdr.cmsg_level = level;
cmhdr.cmsg_type = type;
cmhdr.cmsg_len = cmlen;
err = -EFAULT;
if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
goto out;
if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
goto out;
cmlen = CMSG_SPACE(len);
if (msg->msg_controllen < cmlen)
cmlen = msg->msg_controllen;
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
err = 0;
out:
return err;
}
EXPORT_SYMBOL(put_cmsg);
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
{
struct cmsghdr __user *cm
= (__force struct cmsghdr __user*)msg->msg_control;
int fdmax = 0;
int fdnum = scm->fp->count;
struct file **fp = scm->fp->fp;
int __user *cmfptr;
int err = 0, i;
if (MSG_CMSG_COMPAT & msg->msg_flags) {
scm_detach_fds_compat(msg, scm);
return;
}
if (msg->msg_controllen > sizeof(struct cmsghdr))
fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr))
/ sizeof(int));
if (fdnum < fdmax)
fdmax = fdnum;
for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax;
i++, cmfptr++)
{
int new_fd;
err = security_file_receive(fp[i]);
if (err)
break;
err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags
? O_CLOEXEC : 0);
if (err < 0)
break;
new_fd = err;
err = put_user(new_fd, cmfptr);
if (err) {
put_unused_fd(new_fd);
break;
}
/* Bump the usage count and install the file. */
get_file(fp[i]);
fd_install(new_fd, fp[i]);
}
if (i > 0)
{
int cmlen = CMSG_LEN(i*sizeof(int));
err = put_user(SOL_SOCKET, &cm->cmsg_level);
if (!err)
err = put_user(SCM_RIGHTS, &cm->cmsg_type);
if (!err)
err = put_user(cmlen, &cm->cmsg_len);
if (!err) {
cmlen = CMSG_SPACE(i*sizeof(int));
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
}
}
if (i < fdnum || (fdnum && fdmax <= 0))
msg->msg_flags |= MSG_CTRUNC;
/*
* All of the files that fit in the message have had their
* usage counts incremented, so we just free the list.
*/
__scm_destroy(scm);
}
EXPORT_SYMBOL(scm_detach_fds);
struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
{
struct scm_fp_list *new_fpl;
int i;
if (!fpl)
return NULL;
new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
GFP_KERNEL);
if (new_fpl) {
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
new_fpl->max = new_fpl->count;
}
return new_fpl;
}
EXPORT_SYMBOL(scm_fp_dup);