Merge branch 'Introduce bpf_ct_set_nat_info kfunc helper'

Lorenzo Bianconi says:

====================

Introduce bpf_ct_set_nat_info kfunc helper in order to set source and
destination nat addresses/ports in a new allocated ct entry not inserted
in the connection tracking table yet.
Introduce support for per-parameter trusted args.

Changes since v2:
- use int instead of a pointer for port in bpf_ct_set_nat_info signature
- modify KF_TRUSTED_ARGS definition in order to referenced pointer constraint
  just for PTR_TO_BTF_ID
- drop patch 2/4

Changes since v1:
- enable CONFIG_NF_NAT in tools/testing/selftests/bpf/config

Kumar Kartikeya Dwivedi (1):
  bpf: Tweak definition of KF_TRUSTED_ARGS
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2022-09-21 19:25:26 -07:00
commit 2d863b14fb
6 changed files with 110 additions and 17 deletions

View File

@ -137,14 +137,22 @@ KF_ACQUIRE and KF_RET_NULL flags.
--------------------------
The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It
indicates that the all pointer arguments will always be refcounted, and have
their offset set to 0. It can be used to enforce that a pointer to a refcounted
object acquired from a kfunc or BPF helper is passed as an argument to this
kfunc without any modifications (e.g. pointer arithmetic) such that it is
trusted and points to the original object. This flag is often used for kfuncs
that operate (change some property, perform some operation) on an object that
was obtained using an acquire kfunc. Such kfuncs need an unchanged pointer to
ensure the integrity of the operation being performed on the expected object.
indicates that the all pointer arguments will always have a guaranteed lifetime,
and pointers to kernel objects are always passed to helpers in their unmodified
form (as obtained from acquire kfuncs).
It can be used to enforce that a pointer to a refcounted object acquired from a
kfunc or BPF helper is passed as an argument to this kfunc without any
modifications (e.g. pointer arithmetic) such that it is trusted and points to
the original object.
Meanwhile, it is also allowed pass pointers to normal memory to such kfuncs,
but those can have a non-zero offset.
This flag is often used for kfuncs that operate (change some property, perform
some operation) on an object that was obtained using an acquire kfunc. Such
kfuncs need an unchanged pointer to ensure the integrity of the operation being
performed on the expected object.
2.4.6 KF_SLEEPABLE flag
-----------------------

View File

@ -6227,7 +6227,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
bool processing_call)
{
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
bool rel = false, kptr_get = false, trusted_arg = false;
bool rel = false, kptr_get = false, trusted_args = false;
bool sleepable = false;
struct bpf_verifier_log *log = &env->log;
u32 i, nargs, ref_id, ref_obj_id = 0;
@ -6265,7 +6265,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
/* Only kfunc can be release func */
rel = kfunc_meta->flags & KF_RELEASE;
kptr_get = kfunc_meta->flags & KF_KPTR_GET;
trusted_arg = kfunc_meta->flags & KF_TRUSTED_ARGS;
trusted_args = kfunc_meta->flags & KF_TRUSTED_ARGS;
sleepable = kfunc_meta->flags & KF_SLEEPABLE;
}
@ -6276,6 +6276,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
enum bpf_arg_type arg_type = ARG_DONTCARE;
u32 regno = i + 1;
struct bpf_reg_state *reg = &regs[regno];
bool obj_ptr = false;
t = btf_type_skip_modifiers(btf, args[i].type, NULL);
if (btf_type_is_scalar(t)) {
@ -6323,10 +6324,17 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
/* These register types have special constraints wrt ref_obj_id
* and offset checks. The rest of trusted args don't.
*/
obj_ptr = reg->type == PTR_TO_CTX || reg->type == PTR_TO_BTF_ID ||
reg2btf_ids[base_type(reg->type)];
/* Check if argument must be a referenced pointer, args + i has
* been verified to be a pointer (after skipping modifiers).
* PTR_TO_CTX is ok without having non-zero ref_obj_id.
*/
if (is_kfunc && trusted_arg && !reg->ref_obj_id) {
if (is_kfunc && trusted_args && (obj_ptr && reg->type != PTR_TO_CTX) && !reg->ref_obj_id) {
bpf_log(log, "R%d must be referenced\n", regno);
return -EINVAL;
}
@ -6335,7 +6343,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
ref_tname = btf_name_by_offset(btf, ref_t->name_off);
/* Trusted args have the same offset checks as release arguments */
if (trusted_arg || (rel && reg->ref_obj_id))
if ((trusted_args && obj_ptr) || (rel && reg->ref_obj_id))
arg_type |= OBJ_RELEASE;
ret = check_func_arg_reg_off(env, reg, regno, arg_type);
if (ret < 0)
@ -6435,7 +6443,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
reg_ref_t->name_off);
if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
reg->off, btf, ref_id,
trusted_arg || (rel && reg->ref_obj_id))) {
trusted_args || (rel && reg->ref_obj_id))) {
bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
func_name, i,
btf_type_str(ref_t), ref_tname,

View File

@ -17,6 +17,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_nat.h>
/* bpf_ct_opts - Options for CT lookup helpers
*
@ -137,7 +138,6 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
memset(&ct->proto, 0, sizeof(ct->proto));
__nf_ct_set_timeout(ct, timeout * HZ);
ct->status |= IPS_CONFIRMED;
out:
if (opts->netns_id >= 0)
@ -390,6 +390,7 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
struct nf_conn *nfct = (struct nf_conn *)nfct_i;
int err;
nfct->status |= IPS_CONFIRMED;
err = nf_conntrack_hash_check_insert(nfct);
if (err < 0) {
nf_conntrack_free(nfct);
@ -475,6 +476,49 @@ int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
return nf_ct_change_status_common(nfct, status);
}
/* bpf_ct_set_nat_info - Set source or destination nat address
*
* Set source or destination nat address of the newly allocated
* nf_conn before insertion. This must be invoked for referenced
* PTR_TO_BTF_ID to nf_conn___init.
*
* Parameters:
* @nfct - Pointer to referenced nf_conn object, obtained using
* bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
* @addr - Nat source/destination address
* @port - Nat source/destination port. Non-positive values are
* interpreted as select a random port.
* @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST
*/
int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
union nf_inet_addr *addr, int port,
enum nf_nat_manip_type manip)
{
#if ((IS_MODULE(CONFIG_NF_NAT) && IS_MODULE(CONFIG_NF_CONNTRACK)) || \
IS_BUILTIN(CONFIG_NF_NAT))
struct nf_conn *ct = (struct nf_conn *)nfct;
u16 proto = nf_ct_l3num(ct);
struct nf_nat_range2 range;
if (proto != NFPROTO_IPV4 && proto != NFPROTO_IPV6)
return -EINVAL;
memset(&range, 0, sizeof(struct nf_nat_range2));
range.flags = NF_NAT_RANGE_MAP_IPS;
range.min_addr = *addr;
range.max_addr = range.min_addr;
if (port > 0) {
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
range.min_proto.all = cpu_to_be16(port);
range.max_proto.all = range.min_proto.all;
}
return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
#else
return -EOPNOTSUPP;
#endif
}
__diag_pop()
BTF_SET8_START(nf_ct_kfunc_set)
@ -488,6 +532,7 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS)
BTF_SET8_END(nf_ct_kfunc_set)
static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {

View File

@ -63,6 +63,7 @@ CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_MARK=y
CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_DEFRAG_IPV6=y
CONFIG_NF_NAT=y
CONFIG_RC_CORE=y
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y

View File

@ -26,7 +26,10 @@ enum {
TEST_TC_BPF,
};
#define TIMEOUT_MS 3000
#define TIMEOUT_MS 3000
#define IPS_STATUS_MASK (IPS_CONFIRMED | IPS_SEEN_REPLY | \
IPS_SRC_NAT_DONE | IPS_DST_NAT_DONE | \
IPS_SRC_NAT | IPS_DST_NAT)
static int connect_to_server(int srv_fd)
{
@ -114,10 +117,11 @@ static void test_bpf_nf_ct(int mode)
ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update");
ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update");
ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value");
ASSERT_EQ(skel->bss->test_status, IPS_CONFIRMED | IPS_SEEN_REPLY,
"Test for ct status update ");
ASSERT_EQ(skel->bss->test_status, IPS_STATUS_MASK, "Test for ct status update ");
ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup");
ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
end:
if (srv_client_fd != -1)
close(srv_client_fd);

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#define EAFNOSUPPORT 97
#define EPROTO 71
@ -24,6 +25,8 @@ int test_succ_lookup = -ENOENT;
u32 test_delta_timeout = 0;
u32 test_status = 0;
u32 test_insert_lookup_mark = 0;
int test_snat_addr = -EINVAL;
int test_dnat_addr = -EINVAL;
__be32 saddr = 0;
__be16 sport = 0;
__be32 daddr = 0;
@ -54,6 +57,8 @@ void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym;
int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym;
int bpf_ct_set_status(struct nf_conn *, u32) __ksym;
int bpf_ct_change_status(struct nf_conn *, u32) __ksym;
int bpf_ct_set_nat_info(struct nf_conn *, union nf_inet_addr *,
int port, enum nf_nat_manip_type) __ksym;
static __always_inline void
nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
@ -141,11 +146,22 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
sizeof(opts_def));
if (ct) {
__u16 sport = bpf_get_prandom_u32();
__u16 dport = bpf_get_prandom_u32();
union nf_inet_addr saddr = {};
union nf_inet_addr daddr = {};
struct nf_conn *ct_ins;
bpf_ct_set_timeout(ct, 10000);
ct->mark = 77;
/* snat */
saddr.ip = bpf_get_prandom_u32();
bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC);
/* dnat */
daddr.ip = bpf_get_prandom_u32();
bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST);
ct_ins = bpf_ct_insert_entry(ct);
if (ct_ins) {
struct nf_conn *ct_lk;
@ -153,6 +169,17 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
&opts_def, sizeof(opts_def));
if (ct_lk) {
struct nf_conntrack_tuple *tuple;
/* check snat and dnat addresses */
tuple = &ct_lk->tuplehash[IP_CT_DIR_REPLY].tuple;
if (tuple->dst.u3.ip == saddr.ip &&
tuple->dst.u.all == bpf_htons(sport))
test_snat_addr = 0;
if (tuple->src.u3.ip == daddr.ip &&
tuple->src.u.all == bpf_htons(dport))
test_dnat_addr = 0;
/* update ct entry timeout */
bpf_ct_change_timeout(ct_lk, 10000);
test_delta_timeout = ct_lk->timeout - bpf_jiffies64();