// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause // Copyright (c) 2020 Cloudflare #include #include #include #include #include #include #include #include #define IP4(a, b, c, d) \ bpf_htonl((((__u32)(a) & 0xffU) << 24) | \ (((__u32)(b) & 0xffU) << 16) | \ (((__u32)(c) & 0xffU) << 8) | \ (((__u32)(d) & 0xffU) << 0)) #define IP6(aaaa, bbbb, cccc, dddd) \ { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) } #define MAX_SOCKS 32 struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, MAX_SOCKS); __type(key, __u32); __type(value, __u64); } redir_map SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 2); __type(key, int); __type(value, int); } run_map SEC(".maps"); enum { PROG1 = 0, PROG2, }; enum { SERVER_A = 0, SERVER_B, }; /* Addressable key/value constants for convenience */ static const int KEY_PROG1 = PROG1; static const int KEY_PROG2 = PROG2; static const int PROG_DONE = 1; static const __u32 KEY_SERVER_A = SERVER_A; static const __u32 KEY_SERVER_B = SERVER_B; static const __u16 DST_PORT = 7007; /* Host byte order */ static const __u32 DST_IP4 = IP4(127, 0, 0, 1); static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001); SEC("sk_lookup/lookup_pass") int lookup_pass(struct bpf_sk_lookup *ctx) { return SK_PASS; } SEC("sk_lookup/lookup_drop") int lookup_drop(struct bpf_sk_lookup *ctx) { return SK_DROP; } SEC("sk_reuseport/reuse_pass") int reuseport_pass(struct sk_reuseport_md *ctx) { return SK_PASS; } SEC("sk_reuseport/reuse_drop") int reuseport_drop(struct sk_reuseport_md *ctx) { return SK_DROP; } /* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */ SEC("sk_lookup/redir_port") int redir_port(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err; if (ctx->local_port != DST_PORT) return SK_PASS; sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); if (!sk) return SK_PASS; err = bpf_sk_assign(ctx, sk, 0); bpf_sk_release(sk); return err ? SK_DROP : SK_PASS; } /* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */ SEC("sk_lookup/redir_ip4") int redir_ip4(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err; if (ctx->family != AF_INET) return SK_PASS; if (ctx->local_port != DST_PORT) return SK_PASS; if (ctx->local_ip4 != DST_IP4) return SK_PASS; sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); if (!sk) return SK_PASS; err = bpf_sk_assign(ctx, sk, 0); bpf_sk_release(sk); return err ? SK_DROP : SK_PASS; } /* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */ SEC("sk_lookup/redir_ip6") int redir_ip6(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err; if (ctx->family != AF_INET6) return SK_PASS; if (ctx->local_port != DST_PORT) return SK_PASS; if (ctx->local_ip6[0] != DST_IP6[0] || ctx->local_ip6[1] != DST_IP6[1] || ctx->local_ip6[2] != DST_IP6[2] || ctx->local_ip6[3] != DST_IP6[3]) return SK_PASS; sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); if (!sk) return SK_PASS; err = bpf_sk_assign(ctx, sk, 0); bpf_sk_release(sk); return err ? SK_DROP : SK_PASS; } SEC("sk_lookup/select_sock_a") int select_sock_a(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err; sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); if (!sk) return SK_PASS; err = bpf_sk_assign(ctx, sk, 0); bpf_sk_release(sk); return err ? SK_DROP : SK_PASS; } SEC("sk_lookup/select_sock_a_no_reuseport") int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err; sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); if (!sk) return SK_DROP; err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT); bpf_sk_release(sk); return err ? SK_DROP : SK_PASS; } SEC("sk_reuseport/select_sock_b") int select_sock_b(struct sk_reuseport_md *ctx) { __u32 key = KEY_SERVER_B; int err; err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0); return err ? SK_DROP : SK_PASS; } /* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */ SEC("sk_lookup/sk_assign_eexist") int sk_assign_eexist(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err, ret; ret = SK_DROP; sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); if (!sk) goto out; err = bpf_sk_assign(ctx, sk, 0); if (err) goto out; bpf_sk_release(sk); sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); if (!sk) goto out; err = bpf_sk_assign(ctx, sk, 0); if (err != -EEXIST) { bpf_printk("sk_assign returned %d, expected %d\n", err, -EEXIST); goto out; } ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ out: if (sk) bpf_sk_release(sk); return ret; } /* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */ SEC("sk_lookup/sk_assign_replace_flag") int sk_assign_replace_flag(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err, ret; ret = SK_DROP; sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); if (!sk) goto out; err = bpf_sk_assign(ctx, sk, 0); if (err) goto out; bpf_sk_release(sk); sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); if (!sk) goto out; err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); if (err) { bpf_printk("sk_assign returned %d, expected 0\n", err); goto out; } ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ out: if (sk) bpf_sk_release(sk); return ret; } /* Check that bpf_sk_assign(sk=NULL) is accepted. */ SEC("sk_lookup/sk_assign_null") int sk_assign_null(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk = NULL; int err, ret; ret = SK_DROP; err = bpf_sk_assign(ctx, NULL, 0); if (err) { bpf_printk("sk_assign returned %d, expected 0\n", err); goto out; } sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); if (!sk) goto out; err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); if (err) { bpf_printk("sk_assign returned %d, expected 0\n", err); goto out; } if (ctx->sk != sk) goto out; err = bpf_sk_assign(ctx, NULL, 0); if (err != -EEXIST) goto out; err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE); if (err) goto out; err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); if (err) goto out; ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ out: if (sk) bpf_sk_release(sk); return ret; } /* Check that selected sk is accessible through context. */ SEC("sk_lookup/access_ctx_sk") int access_ctx_sk(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk1 = NULL, *sk2 = NULL; int err, ret; ret = SK_DROP; /* Try accessing unassigned (NULL) ctx->sk field */ if (ctx->sk && ctx->sk->family != AF_INET) goto out; /* Assign a value to ctx->sk */ sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); if (!sk1) goto out; err = bpf_sk_assign(ctx, sk1, 0); if (err) goto out; if (ctx->sk != sk1) goto out; /* Access ctx->sk fields */ if (ctx->sk->family != AF_INET || ctx->sk->type != SOCK_STREAM || ctx->sk->state != BPF_TCP_LISTEN) goto out; /* Reset selection */ err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE); if (err) goto out; if (ctx->sk) goto out; /* Assign another socket */ sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); if (!sk2) goto out; err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE); if (err) goto out; if (ctx->sk != sk2) goto out; /* Access reassigned ctx->sk fields */ if (ctx->sk->family != AF_INET || ctx->sk->type != SOCK_STREAM || ctx->sk->state != BPF_TCP_LISTEN) goto out; ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ out: if (sk1) bpf_sk_release(sk1); if (sk2) bpf_sk_release(sk2); return ret; } /* Check narrow loads from ctx fields that support them. * * Narrow loads of size >= target field size from a non-zero offset * are not covered because they give bogus results, that is the * verifier ignores the offset. */ SEC("sk_lookup/ctx_narrow_access") int ctx_narrow_access(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err, family; __u16 *half; __u8 *byte; bool v4; v4 = (ctx->family == AF_INET); /* Narrow loads from family field */ byte = (__u8 *)&ctx->family; half = (__u16 *)&ctx->family; if (byte[0] != (v4 ? AF_INET : AF_INET6) || byte[1] != 0 || byte[2] != 0 || byte[3] != 0) return SK_DROP; if (half[0] != (v4 ? AF_INET : AF_INET6)) return SK_DROP; byte = (__u8 *)&ctx->protocol; if (byte[0] != IPPROTO_TCP || byte[1] != 0 || byte[2] != 0 || byte[3] != 0) return SK_DROP; half = (__u16 *)&ctx->protocol; if (half[0] != IPPROTO_TCP) return SK_DROP; /* Narrow loads from remote_port field. Expect non-0 value. */ byte = (__u8 *)&ctx->remote_port; if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0) return SK_DROP; half = (__u16 *)&ctx->remote_port; if (half[0] == 0) return SK_DROP; /* Narrow loads from local_port field. Expect DST_PORT. */ byte = (__u8 *)&ctx->local_port; if (byte[0] != ((DST_PORT >> 0) & 0xff) || byte[1] != ((DST_PORT >> 8) & 0xff) || byte[2] != 0 || byte[3] != 0) return SK_DROP; half = (__u16 *)&ctx->local_port; if (half[0] != DST_PORT) return SK_DROP; /* Narrow loads from IPv4 fields */ if (v4) { /* Expect non-0.0.0.0 in remote_ip4 */ byte = (__u8 *)&ctx->remote_ip4; if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0) return SK_DROP; half = (__u16 *)&ctx->remote_ip4; if (half[0] == 0 && half[1] == 0) return SK_DROP; /* Expect DST_IP4 in local_ip4 */ byte = (__u8 *)&ctx->local_ip4; if (byte[0] != ((DST_IP4 >> 0) & 0xff) || byte[1] != ((DST_IP4 >> 8) & 0xff) || byte[2] != ((DST_IP4 >> 16) & 0xff) || byte[3] != ((DST_IP4 >> 24) & 0xff)) return SK_DROP; half = (__u16 *)&ctx->local_ip4; if (half[0] != ((DST_IP4 >> 0) & 0xffff) || half[1] != ((DST_IP4 >> 16) & 0xffff)) return SK_DROP; } else { /* Expect 0.0.0.0 IPs when family != AF_INET */ byte = (__u8 *)&ctx->remote_ip4; if (byte[0] != 0 || byte[1] != 0 && byte[2] != 0 || byte[3] != 0) return SK_DROP; half = (__u16 *)&ctx->remote_ip4; if (half[0] != 0 || half[1] != 0) return SK_DROP; byte = (__u8 *)&ctx->local_ip4; if (byte[0] != 0 || byte[1] != 0 && byte[2] != 0 || byte[3] != 0) return SK_DROP; half = (__u16 *)&ctx->local_ip4; if (half[0] != 0 || half[1] != 0) return SK_DROP; } /* Narrow loads from IPv6 fields */ if (!v4) { /* Expenct non-:: IP in remote_ip6 */ byte = (__u8 *)&ctx->remote_ip6; if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0 && byte[4] == 0 && byte[5] == 0 && byte[6] == 0 && byte[7] == 0 && byte[8] == 0 && byte[9] == 0 && byte[10] == 0 && byte[11] == 0 && byte[12] == 0 && byte[13] == 0 && byte[14] == 0 && byte[15] == 0) return SK_DROP; half = (__u16 *)&ctx->remote_ip6; if (half[0] == 0 && half[1] == 0 && half[2] == 0 && half[3] == 0 && half[4] == 0 && half[5] == 0 && half[6] == 0 && half[7] == 0) return SK_DROP; /* Expect DST_IP6 in local_ip6 */ byte = (__u8 *)&ctx->local_ip6; if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) || byte[1] != ((DST_IP6[0] >> 8) & 0xff) || byte[2] != ((DST_IP6[0] >> 16) & 0xff) || byte[3] != ((DST_IP6[0] >> 24) & 0xff) || byte[4] != ((DST_IP6[1] >> 0) & 0xff) || byte[5] != ((DST_IP6[1] >> 8) & 0xff) || byte[6] != ((DST_IP6[1] >> 16) & 0xff) || byte[7] != ((DST_IP6[1] >> 24) & 0xff) || byte[8] != ((DST_IP6[2] >> 0) & 0xff) || byte[9] != ((DST_IP6[2] >> 8) & 0xff) || byte[10] != ((DST_IP6[2] >> 16) & 0xff) || byte[11] != ((DST_IP6[2] >> 24) & 0xff) || byte[12] != ((DST_IP6[3] >> 0) & 0xff) || byte[13] != ((DST_IP6[3] >> 8) & 0xff) || byte[14] != ((DST_IP6[3] >> 16) & 0xff) || byte[15] != ((DST_IP6[3] >> 24) & 0xff)) return SK_DROP; half = (__u16 *)&ctx->local_ip6; if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) || half[1] != ((DST_IP6[0] >> 16) & 0xffff) || half[2] != ((DST_IP6[1] >> 0) & 0xffff) || half[3] != ((DST_IP6[1] >> 16) & 0xffff) || half[4] != ((DST_IP6[2] >> 0) & 0xffff) || half[5] != ((DST_IP6[2] >> 16) & 0xffff) || half[6] != ((DST_IP6[3] >> 0) & 0xffff) || half[7] != ((DST_IP6[3] >> 16) & 0xffff)) return SK_DROP; } else { /* Expect :: IPs when family != AF_INET6 */ byte = (__u8 *)&ctx->remote_ip6; if (byte[0] != 0 || byte[1] != 0 || byte[2] != 0 || byte[3] != 0 || byte[4] != 0 || byte[5] != 0 || byte[6] != 0 || byte[7] != 0 || byte[8] != 0 || byte[9] != 0 || byte[10] != 0 || byte[11] != 0 || byte[12] != 0 || byte[13] != 0 || byte[14] != 0 || byte[15] != 0) return SK_DROP; half = (__u16 *)&ctx->remote_ip6; if (half[0] != 0 || half[1] != 0 || half[2] != 0 || half[3] != 0 || half[4] != 0 || half[5] != 0 || half[6] != 0 || half[7] != 0) return SK_DROP; byte = (__u8 *)&ctx->local_ip6; if (byte[0] != 0 || byte[1] != 0 || byte[2] != 0 || byte[3] != 0 || byte[4] != 0 || byte[5] != 0 || byte[6] != 0 || byte[7] != 0 || byte[8] != 0 || byte[9] != 0 || byte[10] != 0 || byte[11] != 0 || byte[12] != 0 || byte[13] != 0 || byte[14] != 0 || byte[15] != 0) return SK_DROP; half = (__u16 *)&ctx->local_ip6; if (half[0] != 0 || half[1] != 0 || half[2] != 0 || half[3] != 0 || half[4] != 0 || half[5] != 0 || half[6] != 0 || half[7] != 0) return SK_DROP; } /* Success, redirect to KEY_SERVER_B */ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); if (sk) { bpf_sk_assign(ctx, sk, 0); bpf_sk_release(sk); } return SK_PASS; } /* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */ SEC("sk_lookup/sk_assign_esocknosupport") int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err, ret; ret = SK_DROP; sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); if (!sk) goto out; err = bpf_sk_assign(ctx, sk, 0); if (err != -ESOCKTNOSUPPORT) { bpf_printk("sk_assign returned %d, expected %d\n", err, -ESOCKTNOSUPPORT); goto out; } ret = SK_PASS; /* Success, pass to regular lookup */ out: if (sk) bpf_sk_release(sk); return ret; } SEC("sk_lookup/multi_prog_pass1") int multi_prog_pass1(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); return SK_PASS; } SEC("sk_lookup/multi_prog_pass2") int multi_prog_pass2(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); return SK_PASS; } SEC("sk_lookup/multi_prog_drop1") int multi_prog_drop1(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); return SK_DROP; } SEC("sk_lookup/multi_prog_drop2") int multi_prog_drop2(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); return SK_DROP; } static __always_inline int select_server_a(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err; sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); if (!sk) return SK_DROP; err = bpf_sk_assign(ctx, sk, 0); bpf_sk_release(sk); if (err) return SK_DROP; return SK_PASS; } SEC("sk_lookup/multi_prog_redir1") int multi_prog_redir1(struct bpf_sk_lookup *ctx) { int ret; ret = select_server_a(ctx); bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); return SK_PASS; } SEC("sk_lookup/multi_prog_redir2") int multi_prog_redir2(struct bpf_sk_lookup *ctx) { int ret; ret = select_server_a(ctx); bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); return SK_PASS; } char _license[] SEC("license") = "Dual BSD/GPL"; __u32 _version SEC("version") = 1;