mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-26 06:04:14 +08:00
2bbc078f81
Daniel Borkmann says: ==================== pull-request: bpf-next 2019-12-27 The following pull-request contains BPF updates for your *net-next* tree. We've added 127 non-merge commits during the last 17 day(s) which contain a total of 110 files changed, 6901 insertions(+), 2721 deletions(-). There are three merge conflicts. Conflicts and resolution looks as follows: 1) Merge conflict in net/bpf/test_run.c: There was a tree-wide cleanupc593642c8b
("treewide: Use sizeof_field() macro") which gets in the way withb590cb5f80
("bpf: Switch to offsetofend in BPF_PROG_TEST_RUN"): <<<<<<< HEAD if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) + sizeof_field(struct __sk_buff, priority), ======= if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority), >>>>>>>7c8dce4b16
There are a few occasions that look similar to this. Always take the chunk with offsetofend(). Note that there is one where the fields differ in here: <<<<<<< HEAD if (!range_is_zero(__skb, offsetof(struct __sk_buff, tstamp) + sizeof_field(struct __sk_buff, tstamp), ======= if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs), >>>>>>>7c8dce4b16
Just take the one with offsetofend() /and/ gso_segs. Latter is correct due to850a88cc40
("bpf: Expose __sk_buff wire_len/gso_segs to BPF_PROG_TEST_RUN"). 2) Merge conflict in arch/riscv/net/bpf_jit_comp.c: (I'm keeping Bjorn in Cc here for a double-check in case I got it wrong.) <<<<<<< HEAD if (is_13b_check(off, insn)) return -1; emit(rv_blt(tcc, RV_REG_ZERO, off >> 1), ctx); ======= emit_branch(BPF_JSLT, RV_REG_T1, RV_REG_ZERO, off, ctx); >>>>>>>7c8dce4b16
Result should look like: emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx); 3) Merge conflict in arch/riscv/include/asm/pgtable.h: <<<<<<< HEAD ======= #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) #define BPF_JIT_REGION_SIZE (SZ_128M) #define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) #define BPF_JIT_REGION_END (VMALLOC_END) /* * Roughly size the vmemmap space to be large enough to fit enough * struct pages to map half the virtual address space. Then * position vmemmap directly below the VMALLOC region. */ #define VMEMMAP_SHIFT \ (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) #define VMEMMAP_END (VMALLOC_START - 1) #define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) #define vmemmap ((struct page *)VMEMMAP_START) >>>>>>>7c8dce4b16
Only take the BPF_* defines from there and move them higher up in the same file. Remove the rest from the chunk. The VMALLOC_* etc defines got moved via01f52e16b8
("riscv: define vmemmap before pfn_to_page calls"). Result: [...] #define __S101 PAGE_READ_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) #define BPF_JIT_REGION_SIZE (SZ_128M) #define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) #define BPF_JIT_REGION_END (VMALLOC_END) /* * Roughly size the vmemmap space to be large enough to fit enough * struct pages to map half the virtual address space. Then * position vmemmap directly below the VMALLOC region. */ #define VMEMMAP_SHIFT \ (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) #define VMEMMAP_END (VMALLOC_START - 1) #define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) [...] Let me know if there are any other issues. Anyway, the main changes are: 1) Extend bpftool to produce a struct (aka "skeleton") tailored and specific to a provided BPF object file. This provides an alternative, simplified API compared to standard libbpf interaction. Also, add libbpf extern variable resolution for .kconfig section to import Kconfig data, from Andrii Nakryiko. 2) Add BPF dispatcher for XDP which is a mechanism to avoid indirect calls by generating a branch funnel as discussed back in bpfconf'19 at LSF/MM. Also, add various BPF riscv JIT improvements, from Björn Töpel. 3) Extend bpftool to allow matching BPF programs and maps by name, from Paul Chaignon. 4) Support for replacing cgroup BPF programs attached with BPF_F_ALLOW_MULTI flag for allowing updates without service interruption, from Andrey Ignatov. 5) Cleanup and simplification of ring access functions for AF_XDP with a bonus of 0-5% performance improvement, from Magnus Karlsson. 6) Enable BPF JITs for x86-64 and arm64 by default. Also, final version of audit support for BPF, from Daniel Borkmann and latter with Jiri Olsa. 7) Move and extend test_select_reuseport into BPF program tests under BPF selftests, from Jakub Sitnicki. 8) Various BPF sample improvements for xdpsock for customizing parameters to set up and benchmark AF_XDP, from Jay Jayatheerthan. 9) Improve libbpf to provide a ulimit hint on permission denied errors. Also change XDP sample programs to attach in driver mode by default, from Toke Høiland-Jørgensen. 10) Extend BPF test infrastructure to allow changing skb mark from tc BPF programs, from Nikita V. Shirokov. 11) Optimize prologue code sequence in BPF arm32 JIT, from Russell King. 12) Fix xdp_redirect_cpu BPF sample to manually attach to tracepoints after libbpf conversion, from Jesper Dangaard Brouer. 13) Minor misc improvements from various others. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
316 lines
7.5 KiB
C
316 lines
7.5 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/* Copyright (c) 2019 Facebook */
|
|
#include <linux/hash.h>
|
|
#include <linux/bpf.h>
|
|
#include <linux/filter.h>
|
|
#include <linux/ftrace.h>
|
|
|
|
/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
|
|
#define TRAMPOLINE_HASH_BITS 10
|
|
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
|
|
|
|
static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
|
|
|
|
/* serializes access to trampoline_table */
|
|
static DEFINE_MUTEX(trampoline_mutex);
|
|
|
|
void *bpf_jit_alloc_exec_page(void)
|
|
{
|
|
void *image;
|
|
|
|
image = bpf_jit_alloc_exec(PAGE_SIZE);
|
|
if (!image)
|
|
return NULL;
|
|
|
|
set_vm_flush_reset_perms(image);
|
|
/* Keep image as writeable. The alternative is to keep flipping ro/rw
|
|
* everytime new program is attached or detached.
|
|
*/
|
|
set_memory_x((long)image, 1);
|
|
return image;
|
|
}
|
|
|
|
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
|
|
{
|
|
struct bpf_trampoline *tr;
|
|
struct hlist_head *head;
|
|
void *image;
|
|
int i;
|
|
|
|
mutex_lock(&trampoline_mutex);
|
|
head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
|
|
hlist_for_each_entry(tr, head, hlist) {
|
|
if (tr->key == key) {
|
|
refcount_inc(&tr->refcnt);
|
|
goto out;
|
|
}
|
|
}
|
|
tr = kzalloc(sizeof(*tr), GFP_KERNEL);
|
|
if (!tr)
|
|
goto out;
|
|
|
|
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
|
|
image = bpf_jit_alloc_exec_page();
|
|
if (!image) {
|
|
kfree(tr);
|
|
tr = NULL;
|
|
goto out;
|
|
}
|
|
|
|
tr->key = key;
|
|
INIT_HLIST_NODE(&tr->hlist);
|
|
hlist_add_head(&tr->hlist, head);
|
|
refcount_set(&tr->refcnt, 1);
|
|
mutex_init(&tr->mutex);
|
|
for (i = 0; i < BPF_TRAMP_MAX; i++)
|
|
INIT_HLIST_HEAD(&tr->progs_hlist[i]);
|
|
tr->image = image;
|
|
out:
|
|
mutex_unlock(&trampoline_mutex);
|
|
return tr;
|
|
}
|
|
|
|
static int is_ftrace_location(void *ip)
|
|
{
|
|
long addr;
|
|
|
|
addr = ftrace_location((long)ip);
|
|
if (!addr)
|
|
return 0;
|
|
if (WARN_ON_ONCE(addr != (long)ip))
|
|
return -EFAULT;
|
|
return 1;
|
|
}
|
|
|
|
static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
|
|
{
|
|
void *ip = tr->func.addr;
|
|
int ret;
|
|
|
|
if (tr->func.ftrace_managed)
|
|
ret = unregister_ftrace_direct((long)ip, (long)old_addr);
|
|
else
|
|
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
|
|
return ret;
|
|
}
|
|
|
|
static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr)
|
|
{
|
|
void *ip = tr->func.addr;
|
|
int ret;
|
|
|
|
if (tr->func.ftrace_managed)
|
|
ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr);
|
|
else
|
|
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
|
|
return ret;
|
|
}
|
|
|
|
/* first time registering */
|
|
static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
|
|
{
|
|
void *ip = tr->func.addr;
|
|
int ret;
|
|
|
|
ret = is_ftrace_location(ip);
|
|
if (ret < 0)
|
|
return ret;
|
|
tr->func.ftrace_managed = ret;
|
|
|
|
if (tr->func.ftrace_managed)
|
|
ret = register_ftrace_direct((long)ip, (long)new_addr);
|
|
else
|
|
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
|
|
return ret;
|
|
}
|
|
|
|
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
|
|
* bytes on x86. Pick a number to fit into PAGE_SIZE / 2
|
|
*/
|
|
#define BPF_MAX_TRAMP_PROGS 40
|
|
|
|
static int bpf_trampoline_update(struct bpf_trampoline *tr)
|
|
{
|
|
void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
|
|
void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
|
|
struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
|
|
int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
|
|
int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
|
|
struct bpf_prog **progs, **fentry, **fexit;
|
|
u32 flags = BPF_TRAMP_F_RESTORE_REGS;
|
|
struct bpf_prog_aux *aux;
|
|
int err;
|
|
|
|
if (fentry_cnt + fexit_cnt == 0) {
|
|
err = unregister_fentry(tr, old_image);
|
|
tr->selector = 0;
|
|
goto out;
|
|
}
|
|
|
|
/* populate fentry progs */
|
|
fentry = progs = progs_to_run;
|
|
hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist)
|
|
*progs++ = aux->prog;
|
|
|
|
/* populate fexit progs */
|
|
fexit = progs;
|
|
hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist)
|
|
*progs++ = aux->prog;
|
|
|
|
if (fexit_cnt)
|
|
flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
|
|
|
|
err = arch_prepare_bpf_trampoline(new_image, &tr->func.model, flags,
|
|
fentry, fentry_cnt,
|
|
fexit, fexit_cnt,
|
|
tr->func.addr);
|
|
if (err)
|
|
goto out;
|
|
|
|
if (tr->selector)
|
|
/* progs already running at this address */
|
|
err = modify_fentry(tr, old_image, new_image);
|
|
else
|
|
/* first time registering */
|
|
err = register_fentry(tr, new_image);
|
|
if (err)
|
|
goto out;
|
|
tr->selector++;
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
|
|
{
|
|
switch (t) {
|
|
case BPF_TRACE_FENTRY:
|
|
return BPF_TRAMP_FENTRY;
|
|
default:
|
|
return BPF_TRAMP_FEXIT;
|
|
}
|
|
}
|
|
|
|
int bpf_trampoline_link_prog(struct bpf_prog *prog)
|
|
{
|
|
enum bpf_tramp_prog_type kind;
|
|
struct bpf_trampoline *tr;
|
|
int err = 0;
|
|
|
|
tr = prog->aux->trampoline;
|
|
kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
|
|
mutex_lock(&tr->mutex);
|
|
if (tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]
|
|
>= BPF_MAX_TRAMP_PROGS) {
|
|
err = -E2BIG;
|
|
goto out;
|
|
}
|
|
if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
|
|
/* prog already linked */
|
|
err = -EBUSY;
|
|
goto out;
|
|
}
|
|
hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
|
|
tr->progs_cnt[kind]++;
|
|
err = bpf_trampoline_update(prog->aux->trampoline);
|
|
if (err) {
|
|
hlist_del(&prog->aux->tramp_hlist);
|
|
tr->progs_cnt[kind]--;
|
|
}
|
|
out:
|
|
mutex_unlock(&tr->mutex);
|
|
return err;
|
|
}
|
|
|
|
/* bpf_trampoline_unlink_prog() should never fail. */
|
|
int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
|
|
{
|
|
enum bpf_tramp_prog_type kind;
|
|
struct bpf_trampoline *tr;
|
|
int err;
|
|
|
|
tr = prog->aux->trampoline;
|
|
kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
|
|
mutex_lock(&tr->mutex);
|
|
hlist_del(&prog->aux->tramp_hlist);
|
|
tr->progs_cnt[kind]--;
|
|
err = bpf_trampoline_update(prog->aux->trampoline);
|
|
mutex_unlock(&tr->mutex);
|
|
return err;
|
|
}
|
|
|
|
void bpf_trampoline_put(struct bpf_trampoline *tr)
|
|
{
|
|
if (!tr)
|
|
return;
|
|
mutex_lock(&trampoline_mutex);
|
|
if (!refcount_dec_and_test(&tr->refcnt))
|
|
goto out;
|
|
WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
|
|
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
|
|
goto out;
|
|
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
|
|
goto out;
|
|
bpf_jit_free_exec(tr->image);
|
|
hlist_del(&tr->hlist);
|
|
kfree(tr);
|
|
out:
|
|
mutex_unlock(&trampoline_mutex);
|
|
}
|
|
|
|
/* The logic is similar to BPF_PROG_RUN, but with explicit rcu and preempt that
|
|
* are needed for trampoline. The macro is split into
|
|
* call _bpf_prog_enter
|
|
* call prog->bpf_func
|
|
* call __bpf_prog_exit
|
|
*/
|
|
u64 notrace __bpf_prog_enter(void)
|
|
{
|
|
u64 start = 0;
|
|
|
|
rcu_read_lock();
|
|
preempt_disable();
|
|
if (static_branch_unlikely(&bpf_stats_enabled_key))
|
|
start = sched_clock();
|
|
return start;
|
|
}
|
|
|
|
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
|
|
{
|
|
struct bpf_prog_stats *stats;
|
|
|
|
if (static_branch_unlikely(&bpf_stats_enabled_key) &&
|
|
/* static_key could be enabled in __bpf_prog_enter
|
|
* and disabled in __bpf_prog_exit.
|
|
* And vice versa.
|
|
* Hence check that 'start' is not zero.
|
|
*/
|
|
start) {
|
|
stats = this_cpu_ptr(prog->aux->stats);
|
|
u64_stats_update_begin(&stats->syncp);
|
|
stats->cnt++;
|
|
stats->nsecs += sched_clock() - start;
|
|
u64_stats_update_end(&stats->syncp);
|
|
}
|
|
preempt_enable();
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
int __weak
|
|
arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
|
|
struct bpf_prog **fentry_progs, int fentry_cnt,
|
|
struct bpf_prog **fexit_progs, int fexit_cnt,
|
|
void *orig_call)
|
|
{
|
|
return -ENOTSUPP;
|
|
}
|
|
|
|
static int __init init_trampolines(void)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
|
|
INIT_HLIST_HEAD(&trampoline_table[i]);
|
|
return 0;
|
|
}
|
|
late_initcall(init_trampolines);
|