mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-15 15:04:27 +08:00
b741f16303
This commit introduced per-cpu cgroup local storage. Per-cpu cgroup local storage is very similar to simple cgroup storage (let's call it shared), except all the data is per-cpu. The main goal of per-cpu variant is to implement super fast counters (e.g. packet counters), which don't require neither lookups, neither atomic operations. >From userspace's point of view, accessing a per-cpu cgroup storage is similar to other per-cpu map types (e.g. per-cpu hashmaps and arrays). Writing to a per-cpu cgroup storage is not atomic, but is performed by copying longs, so some minimal atomicity is here, exactly as with other per-cpu maps. Signed-off-by: Roman Gushchin <guro@fb.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Alexei Starovoitov <ast@kernel.org> Acked-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
230 lines
5.7 KiB
C
230 lines
5.7 KiB
C
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
* License as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*/
|
|
#include <linux/bpf.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/random.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/topology.h>
|
|
#include <linux/ktime.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/uidgid.h>
|
|
#include <linux/filter.h>
|
|
|
|
/* If kernel subsystem is allowing eBPF programs to call this function,
|
|
* inside its own verifier_ops->get_func_proto() callback it should return
|
|
* bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
|
|
*
|
|
* Different map implementations will rely on rcu in map methods
|
|
* lookup/update/delete, therefore eBPF programs must run under rcu lock
|
|
* if program is allowed to access maps, so check rcu_read_lock_held in
|
|
* all three functions.
|
|
*/
|
|
BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
|
|
{
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
return (unsigned long) map->ops->map_lookup_elem(map, key);
|
|
}
|
|
|
|
const struct bpf_func_proto bpf_map_lookup_elem_proto = {
|
|
.func = bpf_map_lookup_elem,
|
|
.gpl_only = false,
|
|
.pkt_access = true,
|
|
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
|
|
.arg1_type = ARG_CONST_MAP_PTR,
|
|
.arg2_type = ARG_PTR_TO_MAP_KEY,
|
|
};
|
|
|
|
BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
|
|
void *, value, u64, flags)
|
|
{
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
return map->ops->map_update_elem(map, key, value, flags);
|
|
}
|
|
|
|
const struct bpf_func_proto bpf_map_update_elem_proto = {
|
|
.func = bpf_map_update_elem,
|
|
.gpl_only = false,
|
|
.pkt_access = true,
|
|
.ret_type = RET_INTEGER,
|
|
.arg1_type = ARG_CONST_MAP_PTR,
|
|
.arg2_type = ARG_PTR_TO_MAP_KEY,
|
|
.arg3_type = ARG_PTR_TO_MAP_VALUE,
|
|
.arg4_type = ARG_ANYTHING,
|
|
};
|
|
|
|
BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
|
|
{
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
return map->ops->map_delete_elem(map, key);
|
|
}
|
|
|
|
const struct bpf_func_proto bpf_map_delete_elem_proto = {
|
|
.func = bpf_map_delete_elem,
|
|
.gpl_only = false,
|
|
.pkt_access = true,
|
|
.ret_type = RET_INTEGER,
|
|
.arg1_type = ARG_CONST_MAP_PTR,
|
|
.arg2_type = ARG_PTR_TO_MAP_KEY,
|
|
};
|
|
|
|
const struct bpf_func_proto bpf_get_prandom_u32_proto = {
|
|
.func = bpf_user_rnd_u32,
|
|
.gpl_only = false,
|
|
.ret_type = RET_INTEGER,
|
|
};
|
|
|
|
BPF_CALL_0(bpf_get_smp_processor_id)
|
|
{
|
|
return smp_processor_id();
|
|
}
|
|
|
|
const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
|
|
.func = bpf_get_smp_processor_id,
|
|
.gpl_only = false,
|
|
.ret_type = RET_INTEGER,
|
|
};
|
|
|
|
BPF_CALL_0(bpf_get_numa_node_id)
|
|
{
|
|
return numa_node_id();
|
|
}
|
|
|
|
const struct bpf_func_proto bpf_get_numa_node_id_proto = {
|
|
.func = bpf_get_numa_node_id,
|
|
.gpl_only = false,
|
|
.ret_type = RET_INTEGER,
|
|
};
|
|
|
|
BPF_CALL_0(bpf_ktime_get_ns)
|
|
{
|
|
/* NMI safe access to clock monotonic */
|
|
return ktime_get_mono_fast_ns();
|
|
}
|
|
|
|
const struct bpf_func_proto bpf_ktime_get_ns_proto = {
|
|
.func = bpf_ktime_get_ns,
|
|
.gpl_only = true,
|
|
.ret_type = RET_INTEGER,
|
|
};
|
|
|
|
BPF_CALL_0(bpf_get_current_pid_tgid)
|
|
{
|
|
struct task_struct *task = current;
|
|
|
|
if (unlikely(!task))
|
|
return -EINVAL;
|
|
|
|
return (u64) task->tgid << 32 | task->pid;
|
|
}
|
|
|
|
const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
|
|
.func = bpf_get_current_pid_tgid,
|
|
.gpl_only = false,
|
|
.ret_type = RET_INTEGER,
|
|
};
|
|
|
|
BPF_CALL_0(bpf_get_current_uid_gid)
|
|
{
|
|
struct task_struct *task = current;
|
|
kuid_t uid;
|
|
kgid_t gid;
|
|
|
|
if (unlikely(!task))
|
|
return -EINVAL;
|
|
|
|
current_uid_gid(&uid, &gid);
|
|
return (u64) from_kgid(&init_user_ns, gid) << 32 |
|
|
from_kuid(&init_user_ns, uid);
|
|
}
|
|
|
|
const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
|
|
.func = bpf_get_current_uid_gid,
|
|
.gpl_only = false,
|
|
.ret_type = RET_INTEGER,
|
|
};
|
|
|
|
BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
|
|
{
|
|
struct task_struct *task = current;
|
|
|
|
if (unlikely(!task))
|
|
goto err_clear;
|
|
|
|
strncpy(buf, task->comm, size);
|
|
|
|
/* Verifier guarantees that size > 0. For task->comm exceeding
|
|
* size, guarantee that buf is %NUL-terminated. Unconditionally
|
|
* done here to save the size test.
|
|
*/
|
|
buf[size - 1] = 0;
|
|
return 0;
|
|
err_clear:
|
|
memset(buf, 0, size);
|
|
return -EINVAL;
|
|
}
|
|
|
|
const struct bpf_func_proto bpf_get_current_comm_proto = {
|
|
.func = bpf_get_current_comm,
|
|
.gpl_only = false,
|
|
.ret_type = RET_INTEGER,
|
|
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
|
.arg2_type = ARG_CONST_SIZE,
|
|
};
|
|
|
|
#ifdef CONFIG_CGROUPS
|
|
BPF_CALL_0(bpf_get_current_cgroup_id)
|
|
{
|
|
struct cgroup *cgrp = task_dfl_cgroup(current);
|
|
|
|
return cgrp->kn->id.id;
|
|
}
|
|
|
|
const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
|
|
.func = bpf_get_current_cgroup_id,
|
|
.gpl_only = false,
|
|
.ret_type = RET_INTEGER,
|
|
};
|
|
|
|
#ifdef CONFIG_CGROUP_BPF
|
|
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
|
|
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
|
|
|
|
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
|
|
{
|
|
/* flags argument is not used now,
|
|
* but provides an ability to extend the API.
|
|
* verifier checks that its value is correct.
|
|
*/
|
|
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
|
|
struct bpf_cgroup_storage *storage;
|
|
void *ptr;
|
|
|
|
storage = this_cpu_read(bpf_cgroup_storage[stype]);
|
|
|
|
if (stype == BPF_CGROUP_STORAGE_SHARED)
|
|
ptr = &READ_ONCE(storage->buf)->data[0];
|
|
else
|
|
ptr = this_cpu_ptr(storage->percpu_buf);
|
|
|
|
return (unsigned long)ptr;
|
|
}
|
|
|
|
const struct bpf_func_proto bpf_get_local_storage_proto = {
|
|
.func = bpf_get_local_storage,
|
|
.gpl_only = false,
|
|
.ret_type = RET_PTR_TO_MAP_VALUE,
|
|
.arg1_type = ARG_CONST_MAP_PTR,
|
|
.arg2_type = ARG_ANYTHING,
|
|
};
|
|
#endif
|
|
#endif
|