linux/tools/testing/selftests/bpf/test_progs.h
Lorenz Bauer 234589012b selftests/bpf: Add cls_redirect classifier
cls_redirect is a TC clsact based replacement for the glb-redirect iptables
module available at [1]. It enables what GitHub calls "second chance"
flows [2], similarly proposed by the Beamer paper [3]. In contrast to
glb-redirect, it also supports migrating UDP flows as long as connected
sockets are used. cls_redirect is in production at Cloudflare, as part of
our own L4 load balancer.

We have modified the encapsulation format slightly from glb-redirect:
glbgue_chained_routing.private_data_type has been repurposed to form a
version field and several flags. Both have been arranged in a way that
a private_data_type value of zero matches the current glb-redirect
behaviour. This means that cls_redirect will understand packets in
glb-redirect format, but not vice versa.

The test suite only covers basic features. For example, cls_redirect will
correctly forward path MTU discovery packets, but this is not exercised.
It is also possible to switch the encapsulation format to GRE on the last
hop, which is also not tested.

There are two major distinctions from glb-redirect: first, cls_redirect
relies on receiving encapsulated packets directly from a router. This is
because we don't have access to the neighbour tables from BPF, yet. See
forward_to_next_hop for details. Second, cls_redirect performs decapsulation
instead of using separate ipip and sit tunnel devices. This
avoids issues with the sit tunnel [4] and makes deploying the classifier
easier: decapsulated packets appear on the same interface, so existing
firewall rules continue to work as expected.

The code base started it's life on v4.19, so there are most likely still
hold overs from old workarounds. In no particular order:

- The function buf_off is required to defeat a clang optimization
  that leads to the verifier rejecting the program due to pointer
  arithmetic in the wrong order.

- The function pkt_parse_ipv6 is force inlined, because it would
  otherwise be rejected due to returning a pointer to stack memory.

- The functions fill_tuple and classify_tcp contain kludges, because
  we've run out of function arguments.

- The logic in general is rather nested, due to verifier restrictions.
  I think this is either because the verifier loses track of constants
  on the stack, or because it can't track enum like variables.

1: https://github.com/github/glb-director/tree/master/src/glb-redirect
2: https://github.com/github/glb-director/blob/master/docs/development/second-chance-design.md
3: https://www.usenix.org/conference/nsdi18/presentation/olteanu
4: https://github.com/github/glb-director/issues/64

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200424185556.7358-2-lmb@cloudflare.com
2020-04-26 10:00:36 -07:00

168 lines
3.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <stdarg.h>
#include <time.h>
#include <signal.h>
#include <linux/types.h>
typedef __u16 __sum16;
#include <arpa/inet.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <netinet/tcp.h>
#include <linux/filter.h>
#include <linux/perf_event.h>
#include <linux/socket.h>
#include <linux/unistd.h>
#include <sys/ioctl.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/time.h>
#include <fcntl.h>
#include <pthread.h>
#include <linux/bpf.h>
#include <linux/err.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "test_iptunnel_common.h"
#include "bpf_util.h"
#include <bpf/bpf_endian.h>
#include "trace_helpers.h"
#include "flow_dissector_load.h"
enum verbosity {
VERBOSE_NONE,
VERBOSE_NORMAL,
VERBOSE_VERY,
VERBOSE_SUPER,
};
struct str_set {
const char **strs;
int cnt;
};
struct test_selector {
struct str_set whitelist;
struct str_set blacklist;
bool *num_set;
int num_set_len;
};
struct test_env {
struct test_selector test_selector;
struct test_selector subtest_selector;
bool verifier_stats;
enum verbosity verbosity;
bool jit_enabled;
struct prog_test_def *test;
FILE *stdout;
FILE *stderr;
char *log_buf;
size_t log_cnt;
int nr_cpus;
int succ_cnt; /* successful tests */
int sub_succ_cnt; /* successful sub-tests */
int fail_cnt; /* total failed tests + sub-tests */
int skip_cnt; /* skipped tests */
};
extern struct test_env env;
extern void test__force_log();
extern bool test__start_subtest(const char *name);
extern void test__skip(void);
extern void test__fail(void);
extern int test__join_cgroup(const char *path);
#define MAGIC_BYTES 123
/* ipv4 test vector */
struct ipv4_packet {
struct ethhdr eth;
struct iphdr iph;
struct tcphdr tcp;
} __packed;
extern struct ipv4_packet pkt_v4;
/* ipv6 test vector */
struct ipv6_packet {
struct ethhdr eth;
struct ipv6hdr iph;
struct tcphdr tcp;
} __packed;
extern struct ipv6_packet pkt_v6;
#define PRINT_FAIL(format...) \
({ \
test__fail(); \
fprintf(stdout, "%s:FAIL:%d ", __func__, __LINE__); \
fprintf(stdout, ##format); \
})
#define _CHECK(condition, tag, duration, format...) ({ \
int __ret = !!(condition); \
int __save_errno = errno; \
if (__ret) { \
test__fail(); \
fprintf(stdout, "%s:FAIL:%s ", __func__, tag); \
fprintf(stdout, ##format); \
} else { \
fprintf(stdout, "%s:PASS:%s %d nsec\n", \
__func__, tag, duration); \
} \
errno = __save_errno; \
__ret; \
})
#define CHECK_FAIL(condition) ({ \
int __ret = !!(condition); \
int __save_errno = errno; \
if (__ret) { \
test__fail(); \
fprintf(stdout, "%s:FAIL:%d\n", __func__, __LINE__); \
} \
errno = __save_errno; \
__ret; \
})
#define CHECK(condition, tag, format...) \
_CHECK(condition, tag, duration, format)
#define CHECK_ATTR(condition, tag, format...) \
_CHECK(condition, tag, tattr.duration, format)
#define MAGIC_VAL 0x1234
#define NUM_ITER 100000
#define VIP_NUM 5
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
}
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
int extract_build_id(char *build_id, size_t size);
void *spin_lock_thread(void *arg);
#ifdef __x86_64__
#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
#elif defined(__s390x__)
#define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep"
#else
#define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep"
#endif