mirror of
https://git.kernel.org/pub/scm/network/iproute2/iproute2.git
synced 2024-11-16 14:35:34 +08:00
32e93fb7f6
This larger work addresses one of the bigger remaining issues on tc's eBPF frontend, that is, to allow for persistent file descriptors. Whenever tc parses the ELF object, extracts and loads maps into the kernel, these file descriptors will be out of reach after the tc instance exits. Meaning, for simple (unnested) programs which contain one or multiple maps, the kernel holds a reference, and they will live on inside the kernel until the program holding them is unloaded, but they will be out of reach for user space, even worse with (also multiple nested) tail calls. For this issue, we introduced the concept of an agent that can receive the set of file descriptors from the tc instance creating them, in order to be able to further inspect/update map data for a specific use case. However, while that is more tied towards specific applications, it still doesn't easily allow for sharing maps accross multiple tc instances and would require a daemon to be running in the background. F.e. when a map should be shared by two eBPF programs, one attached to ingress, one to egress, this currently doesn't work with the tc frontend. This work solves exactly that, i.e. if requested, maps can now be _arbitrarily_ shared between object files (PIN_GLOBAL_NS) or within a single object (but various program sections, PIN_OBJECT_NS) without "loosing" the file descriptor set. To make that happen, we use eBPF object pinning introduced in kernel commit b2197755b263 ("bpf: add support for persistent maps/progs") for exactly this purpose. The shipped examples/bpf/bpf_shared.c code from this patch can be easily applied, for instance, as: - classifier-classifier shared: tc filter add dev foo parent 1: bpf obj shared.o sec egress tc filter add dev foo parent ffff: bpf obj shared.o sec ingress - classifier-action shared (here: late binding to a dummy classifier): tc actions add action bpf obj shared.o sec egress pass index 42 tc filter add dev foo parent ffff: bpf obj shared.o sec ingress tc filter add dev foo parent 1: bpf bytecode '1,6 0 0 4294967295,' \ action bpf index 42 The toy example increments a shared counter on egress and dumps its value on ingress (if no sharing (PIN_NONE) would have been chosen, map value is 0, of course, due to the two map instances being created): [...] <idle>-0 [002] ..s. 38264.788234: : map val: 4 <idle>-0 [002] ..s. 38264.788919: : map val: 4 <idle>-0 [002] ..s. 38264.789599: : map val: 5 [...] ... thus if both sections reference the pinned map(s) in question, tc will take care of fetching the appropriate file descriptor. The patch has been tested extensively on both, classifier and action sides. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
204 lines
5.5 KiB
C
204 lines
5.5 KiB
C
/*
|
|
* f_bpf.c BPF-based Classifier
|
|
*
|
|
* This program is free software; you can distribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
* Authors: Daniel Borkmann <dborkman@redhat.com>
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <linux/bpf.h>
|
|
|
|
#include "utils.h"
|
|
#include "tc_util.h"
|
|
#include "tc_bpf.h"
|
|
|
|
static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_CLS;
|
|
|
|
static const int nla_tbl[BPF_NLA_MAX] = {
|
|
[BPF_NLA_OPS_LEN] = TCA_BPF_OPS_LEN,
|
|
[BPF_NLA_OPS] = TCA_BPF_OPS,
|
|
[BPF_NLA_FD] = TCA_BPF_FD,
|
|
[BPF_NLA_NAME] = TCA_BPF_NAME,
|
|
};
|
|
|
|
static void explain(void)
|
|
{
|
|
fprintf(stderr, "Usage: ... bpf ...\n");
|
|
fprintf(stderr, "\n");
|
|
fprintf(stderr, "BPF use case:\n");
|
|
fprintf(stderr, " bytecode BPF_BYTECODE\n");
|
|
fprintf(stderr, " bytecode-file FILE\n");
|
|
fprintf(stderr, "\n");
|
|
fprintf(stderr, "eBPF use case:\n");
|
|
fprintf(stderr, " object-file FILE [ section CLS_NAME ] [ export UDS_FILE ]");
|
|
fprintf(stderr, " [ verbose ] [ direct-action ]\n");
|
|
fprintf(stderr, " object-pinned FILE [ direct-action ]\n");
|
|
fprintf(stderr, "\n");
|
|
fprintf(stderr, "Common remaining options:\n");
|
|
fprintf(stderr, " [ action ACTION_SPEC ]\n");
|
|
fprintf(stderr, " [ classid CLASSID ]\n");
|
|
fprintf(stderr, "\n");
|
|
fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n");
|
|
fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
|
|
fprintf(stderr, "\n");
|
|
fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
|
|
fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode, or a\n");
|
|
fprintf(stderr, "pinned eBPF program.\n");
|
|
fprintf(stderr, "\n");
|
|
fprintf(stderr, "Where CLS_NAME refers to the section name containing the\n");
|
|
fprintf(stderr, "classifier (default \'%s\').\n", bpf_default_section(bpf_type));
|
|
fprintf(stderr, "\n");
|
|
fprintf(stderr, "Where UDS_FILE points to a unix domain socket file in order\n");
|
|
fprintf(stderr, "to hand off control of all created eBPF maps to an agent.\n");
|
|
fprintf(stderr, "\n");
|
|
fprintf(stderr, "ACTION_SPEC := ... look at individual actions\n");
|
|
fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n");
|
|
}
|
|
|
|
static int bpf_parse_opt(struct filter_util *qu, char *handle,
|
|
int argc, char **argv, struct nlmsghdr *n)
|
|
{
|
|
const char *bpf_obj = NULL, *bpf_uds_name = NULL;
|
|
struct tcmsg *t = NLMSG_DATA(n);
|
|
unsigned int bpf_flags = 0;
|
|
bool seen_run = false;
|
|
struct rtattr *tail;
|
|
int ret = 0;
|
|
|
|
if (argc == 0)
|
|
return 0;
|
|
|
|
if (handle) {
|
|
if (get_u32(&t->tcm_handle, handle, 0)) {
|
|
fprintf(stderr, "Illegal \"handle\"\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
tail = (struct rtattr *)(((void *)n) + NLMSG_ALIGN(n->nlmsg_len));
|
|
addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0);
|
|
|
|
while (argc > 0) {
|
|
if (matches(*argv, "run") == 0) {
|
|
NEXT_ARG();
|
|
opt_bpf:
|
|
seen_run = true;
|
|
if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type,
|
|
&bpf_obj, &bpf_uds_name, n)) {
|
|
fprintf(stderr, "Failed to retrieve (e)BPF data!\n");
|
|
return -1;
|
|
}
|
|
} else if (matches(*argv, "classid") == 0 ||
|
|
matches(*argv, "flowid") == 0) {
|
|
unsigned int handle;
|
|
|
|
NEXT_ARG();
|
|
if (get_tc_classid(&handle, *argv)) {
|
|
fprintf(stderr, "Illegal \"classid\"\n");
|
|
return -1;
|
|
}
|
|
addattr32(n, MAX_MSG, TCA_BPF_CLASSID, handle);
|
|
} else if (matches(*argv, "direct-action") == 0 ||
|
|
matches(*argv, "da") == 0) {
|
|
bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
|
|
} else if (matches(*argv, "action") == 0) {
|
|
NEXT_ARG();
|
|
if (parse_action(&argc, &argv, TCA_BPF_ACT, n)) {
|
|
fprintf(stderr, "Illegal \"action\"\n");
|
|
return -1;
|
|
}
|
|
continue;
|
|
} else if (matches(*argv, "police") == 0) {
|
|
NEXT_ARG();
|
|
if (parse_police(&argc, &argv, TCA_BPF_POLICE, n)) {
|
|
fprintf(stderr, "Illegal \"police\"\n");
|
|
return -1;
|
|
}
|
|
continue;
|
|
} else if (matches(*argv, "help") == 0) {
|
|
explain();
|
|
return -1;
|
|
} else {
|
|
if (!seen_run)
|
|
goto opt_bpf;
|
|
|
|
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
explain();
|
|
return -1;
|
|
}
|
|
|
|
NEXT_ARG_FWD();
|
|
}
|
|
|
|
if (bpf_obj && bpf_flags)
|
|
addattr32(n, MAX_MSG, TCA_BPF_FLAGS, bpf_flags);
|
|
|
|
tail->rta_len = (((void *)n) + n->nlmsg_len) - (void *)tail;
|
|
|
|
if (bpf_uds_name)
|
|
ret = bpf_send_map_fds(bpf_uds_name, bpf_obj);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int bpf_print_opt(struct filter_util *qu, FILE *f,
|
|
struct rtattr *opt, __u32 handle)
|
|
{
|
|
struct rtattr *tb[TCA_BPF_MAX + 1];
|
|
|
|
if (opt == NULL)
|
|
return 0;
|
|
|
|
parse_rtattr_nested(tb, TCA_BPF_MAX, opt);
|
|
|
|
if (handle)
|
|
fprintf(f, "handle 0x%x ", handle);
|
|
|
|
if (tb[TCA_BPF_CLASSID]) {
|
|
SPRINT_BUF(b1);
|
|
fprintf(f, "flowid %s ",
|
|
sprint_tc_classid(rta_getattr_u32(tb[TCA_BPF_CLASSID]), b1));
|
|
}
|
|
|
|
if (tb[TCA_BPF_NAME])
|
|
fprintf(f, "%s ", rta_getattr_str(tb[TCA_BPF_NAME]));
|
|
else if (tb[TCA_BPF_FD])
|
|
fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_BPF_FD]));
|
|
|
|
if (tb[TCA_BPF_FLAGS]) {
|
|
unsigned int flags = rta_getattr_u32(tb[TCA_BPF_FLAGS]);
|
|
|
|
if (flags & TCA_BPF_FLAG_ACT_DIRECT)
|
|
fprintf(f, "direct-action ");
|
|
}
|
|
|
|
if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN]) {
|
|
bpf_print_ops(f, tb[TCA_BPF_OPS],
|
|
rta_getattr_u16(tb[TCA_BPF_OPS_LEN]));
|
|
fprintf(f, "\n");
|
|
}
|
|
|
|
if (tb[TCA_BPF_POLICE]) {
|
|
fprintf(f, "\n");
|
|
tc_print_police(f, tb[TCA_BPF_POLICE]);
|
|
}
|
|
|
|
if (tb[TCA_BPF_ACT]) {
|
|
tc_print_action(f, tb[TCA_BPF_ACT]);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct filter_util bpf_filter_util = {
|
|
.id = "bpf",
|
|
.parse_fopt = bpf_parse_opt,
|
|
.print_fopt = bpf_print_opt,
|
|
};
|