Add sample for adding simple drop program to link

Add a sample program that only drops packets at the BPF_PROG_TYPE_XDP_RX
hook of a link. With the drop-only program, observed single core rate is
~20Mpps.

Other tests were run, for instance without the dropcnt increment or
without reading from the packet header, the packet rate was mostly
unchanged.

$ perf record -a samples/bpf/xdp1 $(</sys/class/net/eth0/ifindex)
proto 17:   20403027 drops/s

./pktgen_sample03_burst_single_flow.sh -i $DEV -d $IP -m $MAC -t 4
Running... ctrl^C to stop
Device: eth4@0
Result: OK: 11791017(c11788327+d2689) usec, 59622913 (60byte,0frags)
  5056638pps 2427Mb/sec (2427186240bps) errors: 0
Device: eth4@1
Result: OK: 11791012(c11787906+d3106) usec, 60526944 (60byte,0frags)
  5133311pps 2463Mb/sec (2463989280bps) errors: 0
Device: eth4@2
Result: OK: 11791019(c11788249+d2769) usec, 59868091 (60byte,0frags)
  5077431pps 2437Mb/sec (2437166880bps) errors: 0
Device: eth4@3
Result: OK: 11795039(c11792403+d2636) usec, 59483181 (60byte,0frags)
  5043067pps 2420Mb/sec (2420672160bps) errors: 0

perf report --no-children:
 26.05%  ksoftirqd/0  [mlx4_en]         [k] mlx4_en_process_rx_cq
 17.84%  ksoftirqd/0  [mlx4_en]         [k] mlx4_en_alloc_frags
  5.52%  ksoftirqd/0  [mlx4_en]         [k] mlx4_en_free_frag
  4.90%  swapper      [kernel.vmlinux]  [k] poll_idle
  4.14%  ksoftirqd/0  [kernel.vmlinux]  [k] get_page_from_freelist
  2.78%  ksoftirqd/0  [kernel.vmlinux]  [k] __free_pages_ok
  2.57%  ksoftirqd/0  [kernel.vmlinux]  [k] bpf_map_lookup_elem
  2.51%  swapper      [mlx4_en]         [k] mlx4_en_process_rx_cq
  1.94%  ksoftirqd/0  [kernel.vmlinux]  [k] percpu_array_map_lookup_elem
  1.45%  swapper      [mlx4_en]         [k] mlx4_en_alloc_frags
  1.35%  ksoftirqd/0  [kernel.vmlinux]  [k] free_one_page
  1.33%  swapper      [kernel.vmlinux]  [k] intel_idle
  1.04%  ksoftirqd/0  [mlx4_en]         [k] 0x000000000001c5c5
  0.96%  ksoftirqd/0  [mlx4_en]         [k] 0x000000000001c58d
  0.93%  ksoftirqd/0  [mlx4_en]         [k] 0x000000000001c6ee
  0.92%  ksoftirqd/0  [mlx4_en]         [k] 0x000000000001c6b9
  0.89%  ksoftirqd/0  [kernel.vmlinux]  [k] __alloc_pages_nodemask
  0.83%  ksoftirqd/0  [mlx4_en]         [k] 0x000000000001c686
  0.83%  ksoftirqd/0  [mlx4_en]         [k] 0x000000000001c5d5
  0.78%  ksoftirqd/0  [mlx4_en]         [k] mlx4_alloc_pages.isra.23
  0.77%  ksoftirqd/0  [mlx4_en]         [k] 0x000000000001c5b4
  0.77%  ksoftirqd/0  [kernel.vmlinux]  [k] net_rx_action

machine specs:
 receiver - Intel E5-1630 v3 @ 3.70GHz
 sender - Intel E5645 @ 2.40GHz
 Mellanox ConnectX-3 @40G

Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Brenden Blanco 2016-07-19 12:16:51 -07:00 committed by David S. Miller
parent 47a38e1550
commit 86af8b4191
4 changed files with 286 additions and 0 deletions

View File

@ -21,6 +21,7 @@ hostprogs-y += spintest
hostprogs-y += map_perf_test hostprogs-y += map_perf_test
hostprogs-y += test_overhead hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin hostprogs-y += test_cgrp2_array_pin
hostprogs-y += xdp1
test_verifier-objs := test_verifier.o libbpf.o test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o test_maps-objs := test_maps.o libbpf.o
@ -42,6 +43,7 @@ spintest-objs := bpf_load.o libbpf.o spintest_user.o
map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
# Tell kbuild to always build the programs # Tell kbuild to always build the programs
always := $(hostprogs-y) always := $(hostprogs-y)
@ -64,6 +66,7 @@ always += test_overhead_tp_kern.o
always += test_overhead_kprobe_kern.o always += test_overhead_kprobe_kern.o
always += parse_varlen.o parse_simple.o parse_ldabs.o always += parse_varlen.o parse_simple.o parse_ldabs.o
always += test_cgrp2_tc_kern.o always += test_cgrp2_tc_kern.o
always += xdp1_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(objtree)/usr/include
@ -84,6 +87,7 @@ HOSTLOADLIBES_offwaketime += -lelf
HOSTLOADLIBES_spintest += -lelf HOSTLOADLIBES_spintest += -lelf
HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_map_perf_test += -lelf -lrt
HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt
HOSTLOADLIBES_xdp1 += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang

View File

@ -50,6 +50,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
bool is_xdp = strncmp(event, "xdp", 3) == 0;
enum bpf_prog_type prog_type; enum bpf_prog_type prog_type;
char buf[256]; char buf[256];
int fd, efd, err, id; int fd, efd, err, id;
@ -66,6 +67,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_KPROBE; prog_type = BPF_PROG_TYPE_KPROBE;
} else if (is_tracepoint) { } else if (is_tracepoint) {
prog_type = BPF_PROG_TYPE_TRACEPOINT; prog_type = BPF_PROG_TYPE_TRACEPOINT;
} else if (is_xdp) {
prog_type = BPF_PROG_TYPE_XDP;
} else { } else {
printf("Unknown event '%s'\n", event); printf("Unknown event '%s'\n", event);
return -1; return -1;
@ -79,6 +82,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_fd[prog_cnt++] = fd; prog_fd[prog_cnt++] = fd;
if (is_xdp)
return 0;
if (is_socket) { if (is_socket) {
event += 6; event += 6;
if (*event != '/') if (*event != '/')
@ -319,6 +325,7 @@ int load_bpf_file(char *path)
if (memcmp(shname_prog, "kprobe/", 7) == 0 || if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
memcmp(shname_prog, "kretprobe/", 10) == 0 || memcmp(shname_prog, "kretprobe/", 10) == 0 ||
memcmp(shname_prog, "tracepoint/", 11) == 0 || memcmp(shname_prog, "tracepoint/", 11) == 0 ||
memcmp(shname_prog, "xdp", 3) == 0 ||
memcmp(shname_prog, "socket", 6) == 0) memcmp(shname_prog, "socket", 6) == 0)
load_and_attach(shname_prog, insns, data_prog->d_size); load_and_attach(shname_prog, insns, data_prog->d_size);
} }
@ -336,6 +343,7 @@ int load_bpf_file(char *path)
if (memcmp(shname, "kprobe/", 7) == 0 || if (memcmp(shname, "kprobe/", 7) == 0 ||
memcmp(shname, "kretprobe/", 10) == 0 || memcmp(shname, "kretprobe/", 10) == 0 ||
memcmp(shname, "tracepoint/", 11) == 0 || memcmp(shname, "tracepoint/", 11) == 0 ||
memcmp(shname, "xdp", 3) == 0 ||
memcmp(shname, "socket", 6) == 0) memcmp(shname, "socket", 6) == 0)
load_and_attach(shname, data->d_buf, data->d_size); load_and_attach(shname, data->d_buf, data->d_size);
} }

93
samples/bpf/xdp1_kern.c Normal file
View File

@ -0,0 +1,93 @@
/* Copyright (c) 2016 PLUMgrid
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#define KBUILD_MODNAME "foo"
#include <uapi/linux/bpf.h>
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include "bpf_helpers.h"
struct bpf_map_def SEC("maps") dropcnt = {
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(long),
.max_entries = 256,
};
static int parse_ipv4(void *data, u64 nh_off, void *data_end)
{
struct iphdr *iph = data + nh_off;
if (iph + 1 > data_end)
return 0;
return iph->protocol;
}
static int parse_ipv6(void *data, u64 nh_off, void *data_end)
{
struct ipv6hdr *ip6h = data + nh_off;
if (ip6h + 1 > data_end)
return 0;
return ip6h->nexthdr;
}
SEC("xdp1")
int xdp_prog1(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
int rc = XDP_DROP;
long *value;
u16 h_proto;
u64 nh_off;
u32 index;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return rc;
h_proto = eth->h_proto;
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
vhdr = data + nh_off;
nh_off += sizeof(struct vlan_hdr);
if (data + nh_off > data_end)
return rc;
h_proto = vhdr->h_vlan_encapsulated_proto;
}
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
vhdr = data + nh_off;
nh_off += sizeof(struct vlan_hdr);
if (data + nh_off > data_end)
return rc;
h_proto = vhdr->h_vlan_encapsulated_proto;
}
if (h_proto == htons(ETH_P_IP))
index = parse_ipv4(data, nh_off, data_end);
else if (h_proto == htons(ETH_P_IPV6))
index = parse_ipv6(data, nh_off, data_end);
else
index = 0;
value = bpf_map_lookup_elem(&dropcnt, &index);
if (value)
*value += 1;
return rc;
}
char _license[] SEC("license") = "GPL";

181
samples/bpf/xdp1_user.c Normal file
View File

@ -0,0 +1,181 @@
/* Copyright (c) 2016 PLUMgrid
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <assert.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <unistd.h>
#include "bpf_load.h"
#include "libbpf.h"
static int set_link_xdp_fd(int ifindex, int fd)
{
struct sockaddr_nl sa;
int sock, seq = 0, len, ret = -1;
char buf[4096];
struct nlattr *nla, *nla_xdp;
struct {
struct nlmsghdr nh;
struct ifinfomsg ifinfo;
char attrbuf[64];
} req;
struct nlmsghdr *nh;
struct nlmsgerr *err;
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
printf("open netlink socket: %s\n", strerror(errno));
return -1;
}
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
printf("bind to netlink: %s\n", strerror(errno));
goto cleanup;
}
memset(&req, 0, sizeof(req));
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
req.nh.nlmsg_type = RTM_SETLINK;
req.nh.nlmsg_pid = 0;
req.nh.nlmsg_seq = ++seq;
req.ifinfo.ifi_family = AF_UNSPEC;
req.ifinfo.ifi_index = ifindex;
nla = (struct nlattr *)(((char *)&req)
+ NLMSG_ALIGN(req.nh.nlmsg_len));
nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
printf("send to netlink: %s\n", strerror(errno));
goto cleanup;
}
len = recv(sock, buf, sizeof(buf), 0);
if (len < 0) {
printf("recv from netlink: %s\n", strerror(errno));
goto cleanup;
}
for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
nh = NLMSG_NEXT(nh, len)) {
if (nh->nlmsg_pid != getpid()) {
printf("Wrong pid %d, expected %d\n",
nh->nlmsg_pid, getpid());
goto cleanup;
}
if (nh->nlmsg_seq != seq) {
printf("Wrong seq %d, expected %d\n",
nh->nlmsg_seq, seq);
goto cleanup;
}
switch (nh->nlmsg_type) {
case NLMSG_ERROR:
err = (struct nlmsgerr *)NLMSG_DATA(nh);
if (!err->error)
continue;
printf("nlmsg error %s\n", strerror(-err->error));
goto cleanup;
case NLMSG_DONE:
break;
}
}
ret = 0;
cleanup:
close(sock);
return ret;
}
static int ifindex;
static void int_exit(int sig)
{
set_link_xdp_fd(ifindex, -1);
exit(0);
}
/* simple per-protocol drop counter
*/
static void poll_stats(int interval)
{
unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
const unsigned int nr_keys = 256;
__u64 values[nr_cpus], prev[nr_keys][nr_cpus];
__u32 key;
int i;
memset(prev, 0, sizeof(prev));
while (1) {
sleep(interval);
for (key = 0; key < nr_keys; key++) {
__u64 sum = 0;
assert(bpf_lookup_elem(map_fd[0], &key, values) == 0);
for (i = 0; i < nr_cpus; i++)
sum += (values[i] - prev[key][i]);
if (sum)
printf("proto %u: %10llu pkt/s\n",
key, sum / interval);
memcpy(prev[key], values, sizeof(values));
}
}
}
int main(int ac, char **argv)
{
char filename[256];
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (ac != 2) {
printf("usage: %s IFINDEX\n", argv[0]);
return 1;
}
ifindex = strtoul(argv[1], NULL, 0);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
if (!prog_fd[0]) {
printf("load_bpf_file: %s\n", strerror(errno));
return 1;
}
signal(SIGINT, int_exit);
if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) {
printf("link set xdp fd failed\n");
return 1;
}
poll_stats(2);
return 0;
}