2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-15 08:44:14 +08:00

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2019-02-16

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) numerous libbpf API improvements, from Andrii, Andrey, Yonghong.

2) test all bpf progs in alu32 mode, from Jiong.

3) skb->sk access and bpf_sk_fullsock(), bpf_tcp_sock() helpers, from Martin.

4) support for IP encap in lwt bpf progs, from Peter.

5) remove XDP_QUERY_XSK_UMEM dead code, from Jan.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-02-16 22:56:34 -08:00
commit 885e631959
92 changed files with 2574 additions and 483 deletions

View File

@ -12128,9 +12128,6 @@ static int i40e_xdp(struct net_device *dev,
case XDP_QUERY_PROG:
xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
return 0;
case XDP_QUERY_XSK_UMEM:
return i40e_xsk_umem_query(vsi, &xdp->xsk.umem,
xdp->xsk.queue_id);
case XDP_SETUP_XSK_UMEM:
return i40e_xsk_umem_setup(vsi, xdp->xsk.umem,
xdp->xsk.queue_id);

View File

@ -154,34 +154,6 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
return 0;
}
/**
* i40e_xsk_umem_query - Queries a certain ring/qid for its UMEM
* @vsi: Current VSI
* @umem: UMEM associated to the ring, if any
* @qid: Rx ring to associate UMEM to
*
* This function will store, if any, the UMEM associated to certain ring.
*
* Returns 0 on success, <0 on failure
**/
int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
u16 qid)
{
struct net_device *netdev = vsi->netdev;
struct xdp_umem *queried_umem;
if (vsi->type != I40E_VSI_MAIN)
return -EINVAL;
queried_umem = xdp_get_umem_from_qid(netdev, qid);
if (!queried_umem)
return -EINVAL;
*umem = queried_umem;
return 0;
}
/**
* i40e_xsk_umem_setup - Enable/disassociate a UMEM to/from a ring/qid
* @vsi: Current VSI

View File

@ -10,8 +10,6 @@ struct zero_copy_allocator;
int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
u16 qid);
int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
u16 qid);
void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);

View File

@ -10280,9 +10280,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
xdp->prog_id = adapter->xdp_prog ?
adapter->xdp_prog->aux->id : 0;
return 0;
case XDP_QUERY_XSK_UMEM:
return ixgbe_xsk_umem_query(adapter, &xdp->xsk.umem,
xdp->xsk.queue_id);
case XDP_SETUP_XSK_UMEM:
return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem,
xdp->xsk.queue_id);

View File

@ -30,8 +30,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring);
struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
struct ixgbe_ring *ring);
int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem,
u16 qid);
int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
u16 qid);

View File

@ -174,23 +174,6 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
return 0;
}
int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem,
u16 qid)
{
if (qid >= adapter->num_rx_queues)
return -EINVAL;
if (adapter->xsk_umems) {
if (qid >= adapter->num_xsk_umems)
return -EINVAL;
*umem = adapter->xsk_umems[qid];
return 0;
}
*umem = NULL;
return 0;
}
int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
u16 qid)
{

View File

@ -465,7 +465,7 @@ static int nfp_bpf_init(struct nfp_app *app)
app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf);
}
bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops);
bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops, bpf);
err = PTR_ERR_OR_ZERO(bpf->bpf_dev);
if (err)
goto err_free_neutral_maps;

View File

@ -185,8 +185,6 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog)
static int nfp_bpf_verifier_prep(struct bpf_prog *prog)
{
struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev);
struct nfp_app *app = nn->app;
struct nfp_prog *nfp_prog;
int ret;
@ -197,7 +195,7 @@ static int nfp_bpf_verifier_prep(struct bpf_prog *prog)
INIT_LIST_HEAD(&nfp_prog->insns);
nfp_prog->type = prog->type;
nfp_prog->bpf = app->priv;
nfp_prog->bpf = bpf_offload_dev_priv(prog->aux->offload->offdev);
ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len);
if (ret)

View File

@ -248,7 +248,7 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
static int nsim_bpf_verifier_prep(struct bpf_prog *prog)
{
struct netdevsim *ns = netdev_priv(prog->aux->offload->netdev);
struct netdevsim *ns = bpf_offload_dev_priv(prog->aux->offload->offdev);
if (!ns->bpf_bind_accept)
return -EOPNOTSUPP;
@ -589,7 +589,8 @@ int nsim_bpf_init(struct netdevsim *ns)
if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs))
return -ENOMEM;
ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops);
ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops,
ns);
err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev);
if (err)
return err;

View File

@ -194,6 +194,7 @@ enum bpf_arg_type {
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
};
/* type of values returned from helper functions */
@ -203,6 +204,7 @@ enum bpf_return_type {
RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
};
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@ -256,6 +258,10 @@ enum bpf_reg_type {
PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
PTR_TO_SOCKET, /* reg points to struct bpf_sock */
PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
PTR_TO_SOCK_COMMON, /* reg points to sock_common */
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
};
/* The information passed from prog-specific *_is_valid_access
@ -767,8 +773,9 @@ int bpf_map_offload_get_next_key(struct bpf_map *map,
bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
struct bpf_offload_dev *
bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops);
bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv);
void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev);
void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev);
int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
struct net_device *netdev);
void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
@ -920,6 +927,9 @@ void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
#if defined(CONFIG_NET)
bool bpf_sock_common_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info);
bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info);
u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
@ -928,6 +938,12 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog,
u32 *target_size);
#else
static inline bool bpf_sock_common_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
return false;
}
static inline bool bpf_sock_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
@ -944,4 +960,31 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
}
#endif
#ifdef CONFIG_INET
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info);
u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size);
#else
static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
return false;
}
static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size)
{
return 0;
}
#endif /* CONFIG_INET */
#endif /* _LINUX_BPF_H */

View File

@ -868,7 +868,6 @@ enum bpf_netdev_command {
/* BPF program for offload callbacks, invoked at program load time. */
BPF_OFFLOAD_MAP_ALLOC,
BPF_OFFLOAD_MAP_FREE,
XDP_QUERY_XSK_UMEM,
XDP_SETUP_XSK_UMEM,
};
@ -895,10 +894,10 @@ struct netdev_bpf {
struct {
struct bpf_offloaded_map *offmap;
};
/* XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM */
/* XDP_SETUP_XSK_UMEM */
struct {
struct xdp_umem *umem; /* out for query*/
u16 queue_id; /* in for query */
struct xdp_umem *umem;
u16 queue_id;
} xsk;
};
};

View File

@ -248,6 +248,7 @@ struct ipv6_stub {
const struct in6_addr *addr);
int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
struct dst_entry **dst, struct flowi6 *fl6);
int (*ipv6_route_input)(struct sk_buff *skb);
struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
struct fib6_info *(*fib6_lookup)(struct net *net, int oif,

View File

@ -126,6 +126,8 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int lwtunnel_input(struct sk_buff *skb);
int lwtunnel_xmit(struct sk_buff *skb);
int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
bool ingress);
static inline void lwtunnel_set_redirect(struct dst_entry *dst)
{

View File

@ -2016,6 +2016,19 @@ union bpf_attr {
* Only works if *skb* contains an IPv6 packet. Insert a
* Segment Routing Header (**struct ipv6_sr_hdr**) inside
* the IPv6 header.
* **BPF_LWT_ENCAP_IP**
* IP encapsulation (GRE/GUE/IPIP/etc). The outer header
* must be IPv4 or IPv6, followed by zero or more
* additional headers, up to LWT_BPF_MAX_HEADROOM total
* bytes in all prepended headers. Please note that
* if skb_is_gso(skb) is true, no more than two headers
* can be prepended, and the inner header, if present,
* should be either GRE or UDP/GUE.
*
* BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of
* type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called
* by bpf programs of types BPF_PROG_TYPE_LWT_IN and
* BPF_PROG_TYPE_LWT_XMIT.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
@ -2329,6 +2342,23 @@ union bpf_attr {
* "**y**".
* Return
* 0
*
* struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
* Description
* This helper gets a **struct bpf_sock** pointer such
* that all the fields in bpf_sock can be accessed.
* Return
* A **struct bpf_sock** pointer on success, or NULL in
* case of failure.
*
* struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
* Description
* This helper gets a **struct bpf_tcp_sock** pointer from a
* **struct bpf_sock** pointer.
*
* Return
* A **struct bpf_tcp_sock** pointer on success, or NULL in
* case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -2425,7 +2455,9 @@ union bpf_attr {
FN(msg_pop_data), \
FN(rc_pointer_rel), \
FN(spin_lock), \
FN(spin_unlock),
FN(spin_unlock), \
FN(sk_fullsock), \
FN(tcp_sock),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -2498,7 +2530,8 @@ enum bpf_hdr_start_off {
/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
enum bpf_lwt_encap_mode {
BPF_LWT_ENCAP_SEG6,
BPF_LWT_ENCAP_SEG6_INLINE
BPF_LWT_ENCAP_SEG6_INLINE,
BPF_LWT_ENCAP_IP,
};
#define __bpf_md_ptr(type, name) \
@ -2545,6 +2578,7 @@ struct __sk_buff {
__u64 tstamp;
__u32 wire_len;
__u32 gso_segs;
__bpf_md_ptr(struct bpf_sock *, sk);
};
struct bpf_tunnel_key {
@ -2586,7 +2620,15 @@ enum bpf_ret_code {
BPF_DROP = 2,
/* 3-6 reserved */
BPF_REDIRECT = 7,
/* >127 are reserved for prog type specific return codes */
/* >127 are reserved for prog type specific return codes.
*
* BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and
* BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been
* changed and should be routed based on its new L3 header.
* (This is an L3 redirect, as opposed to L2 redirect
* represented by BPF_REDIRECT above).
*/
BPF_LWT_REROUTE = 128,
};
struct bpf_sock {
@ -2596,14 +2638,52 @@ struct bpf_sock {
__u32 protocol;
__u32 mark;
__u32 priority;
__u32 src_ip4; /* Allows 1,2,4-byte read.
* Stored in network byte order.
/* IP address also allows 1 and 2 bytes access */
__u32 src_ip4;
__u32 src_ip6[4];
__u32 src_port; /* host byte order */
__u32 dst_port; /* network byte order */
__u32 dst_ip4;
__u32 dst_ip6[4];
__u32 state;
};
struct bpf_tcp_sock {
__u32 snd_cwnd; /* Sending congestion window */
__u32 srtt_us; /* smoothed round trip time << 3 in usecs */
__u32 rtt_min;
__u32 snd_ssthresh; /* Slow start size threshold */
__u32 rcv_nxt; /* What we want to receive next */
__u32 snd_nxt; /* Next sequence we send */
__u32 snd_una; /* First byte we want an ack for */
__u32 mss_cache; /* Cached effective mss, not including SACKS */
__u32 ecn_flags; /* ECN status bits. */
__u32 rate_delivered; /* saved rate sample: packets delivered */
__u32 rate_interval_us; /* saved rate sample: time elapsed */
__u32 packets_out; /* Packets which are "in flight" */
__u32 retrans_out; /* Retransmitted packets out */
__u32 total_retrans; /* Total retransmits for entire connection */
__u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
* total number of segments in.
*/
__u32 src_ip6[4]; /* Allows 1,2,4-byte read.
* Stored in network byte order.
__u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
* total number of data segments in.
*/
__u32 src_port; /* Allows 4-byte read.
* Stored in host byte order
__u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
* The total number of segments sent.
*/
__u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
* total number of data segments sent.
*/
__u32 lost_out; /* Lost packets */
__u32 sacked_out; /* SACK'd packets */
__u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
* sum(delta(rcv_nxt)), or how many bytes
* were acked.
*/
__u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
* sum(delta(snd_una)), or how many bytes
* were acked.
*/
};

View File

@ -35,6 +35,7 @@ static DECLARE_RWSEM(bpf_devs_lock);
struct bpf_offload_dev {
const struct bpf_prog_offload_ops *ops;
struct list_head netdevs;
void *priv;
};
struct bpf_offload_netdev {
@ -669,7 +670,7 @@ unlock:
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
struct bpf_offload_dev *
bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops)
bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv)
{
struct bpf_offload_dev *offdev;
int err;
@ -688,6 +689,7 @@ bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops)
return ERR_PTR(-ENOMEM);
offdev->ops = ops;
offdev->priv = priv;
INIT_LIST_HEAD(&offdev->netdevs);
return offdev;
@ -700,3 +702,9 @@ void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev)
kfree(offdev);
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy);
void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev)
{
return offdev->priv;
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_priv);

View File

@ -331,10 +331,19 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
type == PTR_TO_PACKET_META;
}
static bool type_is_sk_pointer(enum bpf_reg_type type)
{
return type == PTR_TO_SOCKET ||
type == PTR_TO_SOCK_COMMON ||
type == PTR_TO_TCP_SOCK;
}
static bool reg_type_may_be_null(enum bpf_reg_type type)
{
return type == PTR_TO_MAP_VALUE_OR_NULL ||
type == PTR_TO_SOCKET_OR_NULL;
type == PTR_TO_SOCKET_OR_NULL ||
type == PTR_TO_SOCK_COMMON_OR_NULL ||
type == PTR_TO_TCP_SOCK_OR_NULL;
}
static bool type_is_refcounted(enum bpf_reg_type type)
@ -377,6 +386,12 @@ static bool is_release_function(enum bpf_func_id func_id)
return func_id == BPF_FUNC_sk_release;
}
static bool is_acquire_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_sk_lookup_tcp ||
func_id == BPF_FUNC_sk_lookup_udp;
}
/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
[NOT_INIT] = "?",
@ -392,6 +407,10 @@ static const char * const reg_type_str[] = {
[PTR_TO_FLOW_KEYS] = "flow_keys",
[PTR_TO_SOCKET] = "sock",
[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
[PTR_TO_SOCK_COMMON] = "sock_common",
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
[PTR_TO_TCP_SOCK] = "tcp_sock",
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
};
static char slot_type_char[] = {
@ -618,13 +637,10 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
}
/* release function corresponding to acquire_reference_state(). Idempotent. */
static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
static int release_reference_state(struct bpf_func_state *state, int ptr_id)
{
int i, last_idx;
if (!ptr_id)
return -EFAULT;
last_idx = state->acquired_refs - 1;
for (i = 0; i < state->acquired_refs; i++) {
if (state->refs[i].id == ptr_id) {
@ -636,21 +652,7 @@ static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
return 0;
}
}
return -EFAULT;
}
/* variation on the above for cases where we expect that there must be an
* outstanding reference for the specified ptr_id.
*/
static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
{
struct bpf_func_state *state = cur_func(env);
int err;
err = __release_reference_state(state, ptr_id);
if (WARN_ON_ONCE(err != 0))
verbose(env, "verifier internal error: can't release reference\n");
return err;
return -EINVAL;
}
static int transfer_reference_state(struct bpf_func_state *dst,
@ -1209,6 +1211,10 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case CONST_PTR_TO_MAP:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
case PTR_TO_SOCK_COMMON:
case PTR_TO_SOCK_COMMON_OR_NULL:
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
return true;
default:
return false;
@ -1640,12 +1646,14 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
return 0;
}
static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
int size, enum bpf_access_type t)
static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
u32 regno, int off, int size,
enum bpf_access_type t)
{
struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *reg = &regs[regno];
struct bpf_insn_access_aux info;
struct bpf_insn_access_aux info = {};
bool valid;
if (reg->smin_value < 0) {
verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
@ -1653,13 +1661,31 @@ static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
return -EACCES;
}
if (!bpf_sock_is_valid_access(off, size, t, &info)) {
verbose(env, "invalid bpf_sock access off=%d size=%d\n",
off, size);
return -EACCES;
switch (reg->type) {
case PTR_TO_SOCK_COMMON:
valid = bpf_sock_common_is_valid_access(off, size, t, &info);
break;
case PTR_TO_SOCKET:
valid = bpf_sock_is_valid_access(off, size, t, &info);
break;
case PTR_TO_TCP_SOCK:
valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
break;
default:
valid = false;
}
return 0;
if (valid) {
env->insn_aux_data[insn_idx].ctx_field_size =
info.ctx_field_size;
return 0;
}
verbose(env, "R%d invalid %s access off=%d size=%d\n",
regno, reg_type_str[reg->type], off, size);
return -EACCES;
}
static bool __is_pointer_value(bool allow_ptr_leaks,
@ -1685,8 +1711,14 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
{
const struct bpf_reg_state *reg = reg_state(env, regno);
return reg->type == PTR_TO_CTX ||
reg->type == PTR_TO_SOCKET;
return reg->type == PTR_TO_CTX;
}
static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
{
const struct bpf_reg_state *reg = reg_state(env, regno);
return type_is_sk_pointer(reg->type);
}
static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
@ -1797,6 +1829,12 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
case PTR_TO_SOCKET:
pointer_desc = "sock ";
break;
case PTR_TO_SOCK_COMMON:
pointer_desc = "sock_common ";
break;
case PTR_TO_TCP_SOCK:
pointer_desc = "tcp_sock ";
break;
default:
break;
}
@ -2000,11 +2038,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
* PTR_TO_PACKET[_META,_END]. In the latter
* case, we know the offset is zero.
*/
if (reg_type == SCALAR_VALUE)
if (reg_type == SCALAR_VALUE) {
mark_reg_unknown(env, regs, value_regno);
else
} else {
mark_reg_known_zero(env, regs,
value_regno);
if (reg_type_may_be_null(reg_type))
regs[value_regno].id = ++env->id_gen;
}
regs[value_regno].type = reg_type;
}
@ -2050,12 +2091,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_flow_keys_access(env, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_SOCKET) {
} else if (type_is_sk_pointer(reg->type)) {
if (t == BPF_WRITE) {
verbose(env, "cannot write into socket\n");
verbose(env, "R%d cannot write into %s\n",
regno, reg_type_str[reg->type]);
return -EACCES;
}
err = check_sock_access(env, regno, off, size, t);
err = check_sock_access(env, insn_idx, regno, off, size, t);
if (!err && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else {
@ -2099,7 +2141,8 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
if (is_ctx_reg(env, insn->dst_reg) ||
is_pkt_reg(env, insn->dst_reg) ||
is_flow_key_reg(env, insn->dst_reg)) {
is_flow_key_reg(env, insn->dst_reg) ||
is_sk_reg(env, insn->dst_reg)) {
verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
insn->dst_reg,
reg_type_str[reg_state(env, insn->dst_reg)->type]);
@ -2366,6 +2409,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
err = check_ctx_reg(env, reg, regno);
if (err < 0)
return err;
} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
expected_type = PTR_TO_SOCK_COMMON;
/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
if (!type_is_sk_pointer(type))
goto err_type;
} else if (arg_type == ARG_PTR_TO_SOCKET) {
expected_type = PTR_TO_SOCKET;
if (type != expected_type)
@ -2780,7 +2828,7 @@ static int release_reference(struct bpf_verifier_env *env,
for (i = 0; i <= vstate->curframe; i++)
release_reg_references(env, vstate->frame[i], meta->ptr_id);
return release_reference_state(env, meta->ptr_id);
return release_reference_state(cur_func(env), meta->ptr_id);
}
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@ -3046,8 +3094,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
} else if (is_release_function(func_id)) {
err = release_reference(env, &meta);
if (err)
if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n",
func_id_name(func_id), func_id);
return err;
}
}
regs = cur_regs(env);
@ -3096,12 +3147,23 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
regs[BPF_REG_0].id = ++env->id_gen;
}
} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
int id = acquire_reference_state(env, insn_idx);
if (id < 0)
return id;
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
regs[BPF_REG_0].id = id;
if (is_acquire_function(func_id)) {
int id = acquire_reference_state(env, insn_idx);
if (id < 0)
return id;
/* For release_reference() */
regs[BPF_REG_0].id = id;
} else {
/* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = ++env->id_gen;
}
} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
regs[BPF_REG_0].id = ++env->id_gen;
} else {
verbose(env, "unknown return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
@ -3361,6 +3423,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
case PTR_TO_PACKET_END:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
case PTR_TO_SOCK_COMMON:
case PTR_TO_SOCK_COMMON_OR_NULL:
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
@ -4594,6 +4660,10 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
}
} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
reg->type = PTR_TO_SOCKET;
} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
reg->type = PTR_TO_SOCK_COMMON;
} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
reg->type = PTR_TO_TCP_SOCK;
}
if (is_null || !(reg_is_refcounted(reg) ||
reg_may_point_to_spin_lock(reg))) {
@ -4618,7 +4688,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
int i, j;
if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
__release_reference_state(state, id);
release_reference_state(state, id);
for (i = 0; i < MAX_BPF_REG; i++)
mark_ptr_or_null_reg(state, &regs[i], id, is_null);
@ -5787,6 +5857,10 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
case PTR_TO_FLOW_KEYS:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
case PTR_TO_SOCK_COMMON:
case PTR_TO_SOCK_COMMON_OR_NULL:
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
/* Only valid matches are exact, which memcmp() above
* would have accepted
*/
@ -6107,6 +6181,10 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
case PTR_TO_CTX:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
case PTR_TO_SOCK_COMMON:
case PTR_TO_SOCK_COMMON_OR_NULL:
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
return false;
default:
return true;
@ -7109,8 +7187,12 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
convert_ctx_access = ops->convert_ctx_access;
break;
case PTR_TO_SOCKET:
case PTR_TO_SOCK_COMMON:
convert_ctx_access = bpf_sock_convert_ctx_access;
break;
case PTR_TO_TCP_SOCK:
convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
break;
default:
continue;
}

View File

@ -403,7 +403,7 @@ config LWTUNNEL
config LWTUNNEL_BPF
bool "Execute BPF program as route nexthop action"
depends on LWTUNNEL
depends on LWTUNNEL && INET
default y if LWTUNNEL=y
---help---
Allows to run BPF programs as a nexthop action following a route

View File

@ -73,6 +73,7 @@
#include <linux/seg6_local.h>
#include <net/seg6.h>
#include <net/seg6_local.h>
#include <net/lwtunnel.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@ -1793,6 +1794,20 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
.arg2_type = ARG_ANYTHING,
};
BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
{
sk = sk_to_full_sk(sk);
return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
}
static const struct bpf_func_proto bpf_sk_fullsock_proto = {
.func = bpf_sk_fullsock,
.gpl_only = false,
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
};
static inline int sk_skb_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
@ -4803,7 +4818,15 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
}
#endif /* CONFIG_IPV6_SEG6_BPF */
BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
bool ingress)
{
return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
}
#endif
BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
u32, len)
{
switch (type) {
@ -4811,14 +4834,41 @@ BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
case BPF_LWT_ENCAP_SEG6:
case BPF_LWT_ENCAP_SEG6_INLINE:
return bpf_push_seg6_encap(skb, type, hdr, len);
#endif
#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
case BPF_LWT_ENCAP_IP:
return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
#endif
default:
return -EINVAL;
}
}
static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
.func = bpf_lwt_push_encap,
BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
void *, hdr, u32, len)
{
switch (type) {
#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
case BPF_LWT_ENCAP_IP:
return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
#endif
default:
return -EINVAL;
}
}
static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
.func = bpf_lwt_in_push_encap,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM,
.arg4_type = ARG_CONST_SIZE
};
static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
.func = bpf_lwt_xmit_push_encap,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
@ -5018,6 +5068,54 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
};
#endif /* CONFIG_IPV6_SEG6_BPF */
#define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT) \
do { \
switch (si->off) { \
case offsetof(md_type, snd_cwnd): \
CONVERT(snd_cwnd); break; \
case offsetof(md_type, srtt_us): \
CONVERT(srtt_us); break; \
case offsetof(md_type, snd_ssthresh): \
CONVERT(snd_ssthresh); break; \
case offsetof(md_type, rcv_nxt): \
CONVERT(rcv_nxt); break; \
case offsetof(md_type, snd_nxt): \
CONVERT(snd_nxt); break; \
case offsetof(md_type, snd_una): \
CONVERT(snd_una); break; \
case offsetof(md_type, mss_cache): \
CONVERT(mss_cache); break; \
case offsetof(md_type, ecn_flags): \
CONVERT(ecn_flags); break; \
case offsetof(md_type, rate_delivered): \
CONVERT(rate_delivered); break; \
case offsetof(md_type, rate_interval_us): \
CONVERT(rate_interval_us); break; \
case offsetof(md_type, packets_out): \
CONVERT(packets_out); break; \
case offsetof(md_type, retrans_out): \
CONVERT(retrans_out); break; \
case offsetof(md_type, total_retrans): \
CONVERT(total_retrans); break; \
case offsetof(md_type, segs_in): \
CONVERT(segs_in); break; \
case offsetof(md_type, data_segs_in): \
CONVERT(data_segs_in); break; \
case offsetof(md_type, segs_out): \
CONVERT(segs_out); break; \
case offsetof(md_type, data_segs_out): \
CONVERT(data_segs_out); break; \
case offsetof(md_type, lost_out): \
CONVERT(lost_out); break; \
case offsetof(md_type, sacked_out): \
CONVERT(sacked_out); break; \
case offsetof(md_type, bytes_received): \
CONVERT(bytes_received); break; \
case offsetof(md_type, bytes_acked): \
CONVERT(bytes_acked); break; \
} \
} while (0)
#ifdef CONFIG_INET
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
int dif, int sdif, u8 family, u8 proto)
@ -5255,6 +5353,79 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
.arg5_type = ARG_ANYTHING,
};
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked))
return false;
if (off % size != 0)
return false;
switch (off) {
case offsetof(struct bpf_tcp_sock, bytes_received):
case offsetof(struct bpf_tcp_sock, bytes_acked):
return size == sizeof(__u64);
default:
return size == sizeof(__u32);
}
}
u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
do { \
BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \
FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
si->dst_reg, si->src_reg, \
offsetof(struct tcp_sock, FIELD)); \
} while (0)
CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock,
BPF_TCP_SOCK_GET_COMMON);
if (insn > insn_buf)
return insn - insn_buf;
switch (si->off) {
case offsetof(struct bpf_tcp_sock, rtt_min):
BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
sizeof(struct minmax));
BUILD_BUG_ON(sizeof(struct minmax) <
sizeof(struct minmax_sample));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct tcp_sock, rtt_min) +
offsetof(struct minmax_sample, v));
break;
}
return insn - insn_buf;
}
BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
{
sk = sk_to_full_sk(sk);
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
return (unsigned long)sk;
return (unsigned long)NULL;
}
static const struct bpf_func_proto bpf_tcp_sock_proto = {
.func = bpf_tcp_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
};
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@ -5284,7 +5455,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_lwt_seg6_adjust_srh ||
func == bpf_lwt_seg6_action ||
#endif
func == bpf_lwt_push_encap)
func == bpf_lwt_in_push_encap ||
func == bpf_lwt_xmit_push_encap)
return true;
return false;
@ -5408,6 +5580,12 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
switch (func_id) {
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
case BPF_FUNC_sk_fullsock:
return &bpf_sk_fullsock_proto;
#ifdef CONFIG_INET
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
#endif
default:
return sk_filter_func_proto(func_id, prog);
}
@ -5479,6 +5657,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_uid_proto;
case BPF_FUNC_fib_lookup:
return &bpf_skb_fib_lookup_proto;
case BPF_FUNC_sk_fullsock:
return &bpf_sk_fullsock_proto;
#ifdef CONFIG_XFRM
case BPF_FUNC_skb_get_xfrm_state:
return &bpf_skb_get_xfrm_state_proto;
@ -5496,6 +5676,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_lookup_udp_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@ -5672,7 +5854,7 @@ lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_lwt_push_encap:
return &bpf_lwt_push_encap_proto;
return &bpf_lwt_in_push_encap_proto;
default:
return lwt_out_func_proto(func_id, prog);
}
@ -5708,6 +5890,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_l4_csum_replace_proto;
case BPF_FUNC_set_hash_invalid:
return &bpf_set_hash_invalid_proto;
case BPF_FUNC_lwt_push_encap:
return &bpf_lwt_xmit_push_encap_proto;
default:
return lwt_out_func_proto(func_id, prog);
}
@ -5766,6 +5950,11 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != sizeof(__u64))
return false;
break;
case offsetof(struct __sk_buff, sk):
if (type == BPF_WRITE || size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
break;
default:
/* Only narrow read access allowed for now. */
if (type == BPF_WRITE) {
@ -5937,31 +6126,44 @@ full_access:
return true;
}
static bool __sock_filter_check_size(int off, int size,
bool bpf_sock_common_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
switch (off) {
case bpf_ctx_range(struct bpf_sock, src_ip4):
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
case bpf_ctx_range_till(struct bpf_sock, type, priority):
return false;
default:
return bpf_sock_is_valid_access(off, size, type, info);
}
return size == size_default;
}
bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
if (off % size != 0)
return false;
if (!__sock_filter_check_size(off, size, info))
return false;
return true;
switch (off) {
case offsetof(struct bpf_sock, state):
case offsetof(struct bpf_sock, family):
case offsetof(struct bpf_sock, type):
case offsetof(struct bpf_sock, protocol):
case offsetof(struct bpf_sock, dst_port):
case offsetof(struct bpf_sock, src_port):
case bpf_ctx_range(struct bpf_sock, src_ip4):
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
case bpf_ctx_range(struct bpf_sock, dst_ip4):
case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
}
return size == size_default;
}
static bool sock_filter_is_valid_access(int off, int size,
@ -6750,6 +6952,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct qdisc_skb_cb, pkt_len);
*target_size = 4;
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
break;
case offsetof(struct __sk_buff, sk):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, sk));
break;
}
return insn - insn_buf;
@ -6798,24 +7007,32 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock, family):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_family));
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct sock_common, skc_family),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common,
skc_family,
FIELD_SIZEOF(struct sock_common,
skc_family),
target_size));
break;
case offsetof(struct bpf_sock, type):
BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, __sk_flags_offset));
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
*target_size = 2;
break;
case offsetof(struct bpf_sock, protocol):
BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, __sk_flags_offset));
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
*target_size = 1;
break;
case offsetof(struct bpf_sock, src_ip4):
@ -6827,6 +7044,15 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
target_size));
break;
case offsetof(struct bpf_sock, dst_ip4):
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_daddr,
FIELD_SIZEOF(struct sock_common,
skc_daddr),
target_size));
break;
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
off = si->off;
@ -6845,6 +7071,23 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
#endif
break;
case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
off = si->off;
off -= offsetof(struct bpf_sock, dst_ip6[0]);
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common,
skc_v6_daddr.s6_addr32[0],
FIELD_SIZEOF(struct sock_common,
skc_v6_daddr.s6_addr32[0]),
target_size) + off);
#else
*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
*target_size = 4;
#endif
break;
case offsetof(struct bpf_sock, src_port):
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct sock_common, skc_num),
@ -6854,6 +7097,26 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
skc_num),
target_size));
break;
case offsetof(struct bpf_sock, dst_port):
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_dport,
FIELD_SIZEOF(struct sock_common,
skc_dport),
target_size));
break;
case offsetof(struct bpf_sock, state):
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct sock_common, skc_state),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_state,
FIELD_SIZEOF(struct sock_common,
skc_state),
target_size));
break;
}
return insn - insn_buf;
@ -7101,6 +7364,85 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
struct bpf_insn *insn = insn_buf;
int off;
/* Helper macro for adding read access to tcp_sock or sock fields. */
#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
is_fullsock), \
si->dst_reg, si->src_reg, \
offsetof(struct bpf_sock_ops_kern, \
is_fullsock)); \
*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, sk),\
si->dst_reg, si->src_reg, \
offsetof(struct bpf_sock_ops_kern, sk));\
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
OBJ_FIELD), \
si->dst_reg, si->dst_reg, \
offsetof(OBJ, OBJ_FIELD)); \
} while (0)
#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
/* Helper macro for adding write access to tcp_sock or sock fields.
* The macro is called with two registers, dst_reg which contains a pointer
* to ctx (context) and src_reg which contains the value that should be
* stored. However, we need an additional register since we cannot overwrite
* dst_reg because it may be used later in the program.
* Instead we "borrow" one of the other register. We first save its value
* into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
* it at the end of the macro.
*/
#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
int reg = BPF_REG_9; \
BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
if (si->dst_reg == reg || si->src_reg == reg) \
reg--; \
if (si->dst_reg == reg || si->src_reg == reg) \
reg--; \
*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
offsetof(struct bpf_sock_ops_kern, \
temp)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
is_fullsock), \
reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, \
is_fullsock)); \
*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, sk),\
reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, sk));\
*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
reg, si->src_reg, \
offsetof(OBJ, OBJ_FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, \
temp)); \
} while (0)
#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
do { \
if (TYPE == BPF_WRITE) \
SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
else \
SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
} while (0)
CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops,
SOCK_OPS_GET_TCP_SOCK_FIELD);
if (insn > insn_buf)
return insn - insn_buf;
switch (si->off) {
case offsetof(struct bpf_sock_ops, op) ...
offsetof(struct bpf_sock_ops, replylong[3]):
@ -7258,175 +7600,15 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
FIELD_SIZEOF(struct minmax_sample, t));
break;
/* Helper macro for adding read access to tcp_sock or sock fields. */
#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
is_fullsock), \
si->dst_reg, si->src_reg, \
offsetof(struct bpf_sock_ops_kern, \
is_fullsock)); \
*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, sk),\
si->dst_reg, si->src_reg, \
offsetof(struct bpf_sock_ops_kern, sk));\
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
OBJ_FIELD), \
si->dst_reg, si->dst_reg, \
offsetof(OBJ, OBJ_FIELD)); \
} while (0)
/* Helper macro for adding write access to tcp_sock or sock fields.
* The macro is called with two registers, dst_reg which contains a pointer
* to ctx (context) and src_reg which contains the value that should be
* stored. However, we need an additional register since we cannot overwrite
* dst_reg because it may be used later in the program.
* Instead we "borrow" one of the other register. We first save its value
* into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
* it at the end of the macro.
*/
#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
int reg = BPF_REG_9; \
BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
if (si->dst_reg == reg || si->src_reg == reg) \
reg--; \
if (si->dst_reg == reg || si->src_reg == reg) \
reg--; \
*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
offsetof(struct bpf_sock_ops_kern, \
temp)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
is_fullsock), \
reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, \
is_fullsock)); \
*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, sk),\
reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, sk));\
*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
reg, si->src_reg, \
offsetof(OBJ, OBJ_FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, \
temp)); \
} while (0)
#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
do { \
if (TYPE == BPF_WRITE) \
SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
else \
SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
} while (0)
case offsetof(struct bpf_sock_ops, snd_cwnd):
SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, srtt_us):
SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, snd_ssthresh):
SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, rcv_nxt):
SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, snd_nxt):
SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, snd_una):
SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, mss_cache):
SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, ecn_flags):
SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, rate_delivered):
SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, rate_interval_us):
SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, packets_out):
SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, retrans_out):
SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, total_retrans):
SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, segs_in):
SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, data_segs_in):
SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, segs_out):
SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, data_segs_out):
SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, lost_out):
SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, sacked_out):
SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, sk_txhash):
SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
struct sock, type);
break;
case offsetof(struct bpf_sock_ops, bytes_received):
SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, bytes_acked):
SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
break;
}
return insn - insn_buf;
}

View File

@ -16,6 +16,8 @@
#include <linux/types.h>
#include <linux/bpf.h>
#include <net/lwtunnel.h>
#include <net/gre.h>
#include <net/ip6_route.h>
struct bpf_lwt_prog {
struct bpf_prog *prog;
@ -55,6 +57,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
switch (ret) {
case BPF_OK:
case BPF_LWT_REROUTE:
break;
case BPF_REDIRECT:
@ -87,6 +90,30 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
return ret;
}
static int bpf_lwt_input_reroute(struct sk_buff *skb)
{
int err = -EINVAL;
if (skb->protocol == htons(ETH_P_IP)) {
struct iphdr *iph = ip_hdr(skb);
err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
iph->tos, skb_dst(skb)->dev);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
err = ipv6_stub->ipv6_route_input(skb);
} else {
err = -EAFNOSUPPORT;
}
if (err)
goto err;
return dst_input(skb);
err:
kfree_skb(skb);
return err;
}
static int bpf_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@ -98,11 +125,11 @@ static int bpf_input(struct sk_buff *skb)
ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
if (ret < 0)
return ret;
if (ret == BPF_LWT_REROUTE)
return bpf_lwt_input_reroute(skb);
}
if (unlikely(!dst->lwtstate->orig_input)) {
pr_warn_once("orig_input not set on dst for prog %s\n",
bpf->out.name);
kfree_skb(skb);
return -EINVAL;
}
@ -147,6 +174,102 @@ static int xmit_check_hhlen(struct sk_buff *skb)
return 0;
}
static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
{
struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
int oif = l3mdev ? l3mdev->ifindex : 0;
struct dst_entry *dst = NULL;
int err = -EAFNOSUPPORT;
struct sock *sk;
struct net *net;
bool ipv4;
if (skb->protocol == htons(ETH_P_IP))
ipv4 = true;
else if (skb->protocol == htons(ETH_P_IPV6))
ipv4 = false;
else
goto err;
sk = sk_to_full_sk(skb->sk);
if (sk) {
if (sk->sk_bound_dev_if)
oif = sk->sk_bound_dev_if;
net = sock_net(sk);
} else {
net = dev_net(skb_dst(skb)->dev);
}
if (ipv4) {
struct iphdr *iph = ip_hdr(skb);
struct flowi4 fl4 = {};
struct rtable *rt;
fl4.flowi4_oif = oif;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_uid = sock_net_uid(net, sk);
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
fl4.flowi4_proto = iph->protocol;
fl4.daddr = iph->daddr;
fl4.saddr = iph->saddr;
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto err;
}
dst = &rt->dst;
} else {
struct ipv6hdr *iph6 = ipv6_hdr(skb);
struct flowi6 fl6 = {};
fl6.flowi6_oif = oif;
fl6.flowi6_mark = skb->mark;
fl6.flowi6_uid = sock_net_uid(net, sk);
fl6.flowlabel = ip6_flowinfo(iph6);
fl6.flowi6_proto = iph6->nexthdr;
fl6.daddr = iph6->daddr;
fl6.saddr = iph6->saddr;
err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6);
if (unlikely(err))
goto err;
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto err;
}
}
if (unlikely(dst->error)) {
err = dst->error;
dst_release(dst);
goto err;
}
/* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
* was done for the previous dst, so we are doing it here again, in
* case the new dst needs much more space. The call below is a noop
* if there is enough header space in skb.
*/
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
if (unlikely(err))
goto err;
skb_dst_drop(skb);
skb_dst_set(skb, dst);
err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
if (unlikely(err))
goto err;
/* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
return LWTUNNEL_XMIT_DONE;
err:
kfree_skb(skb);
return err;
}
static int bpf_xmit(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@ -154,11 +277,20 @@ static int bpf_xmit(struct sk_buff *skb)
bpf = bpf_lwt_lwtunnel(dst->lwtstate);
if (bpf->xmit.prog) {
__be16 proto = skb->protocol;
int ret;
ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
switch (ret) {
case BPF_OK:
/* If the header changed, e.g. via bpf_lwt_push_encap,
* BPF_LWT_REROUTE below should have been used if the
* protocol was also changed.
*/
if (skb->protocol != proto) {
kfree_skb(skb);
return -EINVAL;
}
/* If the header was expanded, headroom might be too
* small for L2 header to come, expand as needed.
*/
@ -169,6 +301,8 @@ static int bpf_xmit(struct sk_buff *skb)
return LWTUNNEL_XMIT_CONTINUE;
case BPF_REDIRECT:
return LWTUNNEL_XMIT_DONE;
case BPF_LWT_REROUTE:
return bpf_lwt_xmit_reroute(skb);
default:
return ret;
}
@ -390,6 +524,133 @@ static const struct lwtunnel_encap_ops bpf_encap_ops = {
.owner = THIS_MODULE,
};
static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type,
int encap_len)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
gso_type |= SKB_GSO_DODGY;
shinfo->gso_type |= gso_type;
skb_decrease_gso_size(shinfo, encap_len);
shinfo->gso_segs = 0;
return 0;
}
static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len)
{
int next_hdr_offset;
void *next_hdr;
__u8 protocol;
/* SCTP and UDP_L4 gso need more nuanced handling than what
* handle_gso_type() does above: skb_decrease_gso_size() is not enough.
* So at the moment only TCP GSO packets are let through.
*/
if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
return -ENOTSUPP;
if (ipv4) {
protocol = ip_hdr(skb)->protocol;
next_hdr_offset = sizeof(struct iphdr);
next_hdr = skb_network_header(skb) + next_hdr_offset;
} else {
protocol = ipv6_hdr(skb)->nexthdr;
next_hdr_offset = sizeof(struct ipv6hdr);
next_hdr = skb_network_header(skb) + next_hdr_offset;
}
switch (protocol) {
case IPPROTO_GRE:
next_hdr_offset += sizeof(struct gre_base_hdr);
if (next_hdr_offset > encap_len)
return -EINVAL;
if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM)
return handle_gso_type(skb, SKB_GSO_GRE_CSUM,
encap_len);
return handle_gso_type(skb, SKB_GSO_GRE, encap_len);
case IPPROTO_UDP:
next_hdr_offset += sizeof(struct udphdr);
if (next_hdr_offset > encap_len)
return -EINVAL;
if (((struct udphdr *)next_hdr)->check)
return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM,
encap_len);
return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len);
case IPPROTO_IP:
case IPPROTO_IPV6:
if (ipv4)
return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len);
else
return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len);
default:
return -EPROTONOSUPPORT;
}
}
int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress)
{
struct iphdr *iph;
bool ipv4;
int err;
if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM))
return -EINVAL;
/* validate protocol and length */
iph = (struct iphdr *)hdr;
if (iph->version == 4) {
ipv4 = true;
if (unlikely(len < iph->ihl * 4))
return -EINVAL;
} else if (iph->version == 6) {
ipv4 = false;
if (unlikely(len < sizeof(struct ipv6hdr)))
return -EINVAL;
} else {
return -EINVAL;
}
if (ingress)
err = skb_cow_head(skb, len + skb->mac_len);
else
err = skb_cow_head(skb,
len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
if (unlikely(err))
return err;
/* push the encap headers and fix pointers */
skb_reset_inner_headers(skb);
skb->encapsulation = 1;
skb_push(skb, len);
if (ingress)
skb_postpush_rcsum(skb, iph, len);
skb_reset_network_header(skb);
memcpy(skb_network_header(skb), hdr, len);
bpf_compute_data_pointers(skb);
skb_clear_hash(skb);
if (ipv4) {
skb->protocol = htons(ETH_P_IP);
iph = ip_hdr(skb);
if (!iph->check)
iph->check = ip_fast_csum((unsigned char *)iph,
iph->ihl);
} else {
skb->protocol = htons(ETH_P_IPV6);
}
if (skb_is_gso(skb))
return handle_gso_encap(skb, ipv4, len);
return 0;
}
static int __init bpf_lwt_init(void)
{
return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);

View File

@ -134,6 +134,11 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
return -EAFNOSUPPORT;
}
static int eafnosupport_ipv6_route_input(struct sk_buff *skb)
{
return -EAFNOSUPPORT;
}
static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
{
return NULL;
@ -170,6 +175,7 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.ipv6_route_input = eafnosupport_ipv6_route_input,
.fib6_get_table = eafnosupport_fib6_get_table,
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
.fib6_lookup = eafnosupport_fib6_lookup,

View File

@ -900,10 +900,17 @@ static struct pernet_operations inet6_net_ops = {
.exit = inet6_net_exit,
};
static int ipv6_route_input(struct sk_buff *skb)
{
ip6_route_input(skb);
return skb_dst(skb)->error;
}
static const struct ipv6_stub ipv6_stub_impl = {
.ipv6_sock_mc_join = ipv6_sock_mc_join,
.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
.ipv6_dst_lookup = ip6_dst_lookup,
.ipv6_route_input = ipv6_route_input,
.fib6_get_table = fib6_get_table,
.fib6_table_lookup = fib6_table_lookup,
.fib6_lookup = fib6_lookup,

View File

@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** **version**
*OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** }
*OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** }
*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@ -34,6 +34,8 @@ SYNOPSIS
*NET-COMMANDS* := { **show** | **list** | **help** }
*FEATURE-COMMANDS* := { **probe** | **help** }
DESCRIPTION
===========
*bpftool* allows for inspection and simple modification of BPF objects

View File

@ -2016,6 +2016,19 @@ union bpf_attr {
* Only works if *skb* contains an IPv6 packet. Insert a
* Segment Routing Header (**struct ipv6_sr_hdr**) inside
* the IPv6 header.
* **BPF_LWT_ENCAP_IP**
* IP encapsulation (GRE/GUE/IPIP/etc). The outer header
* must be IPv4 or IPv6, followed by zero or more
* additional headers, up to LWT_BPF_MAX_HEADROOM total
* bytes in all prepended headers. Please note that
* if skb_is_gso(skb) is true, no more than two headers
* can be prepended, and the inner header, if present,
* should be either GRE or UDP/GUE.
*
* BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of
* type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called
* by bpf programs of types BPF_PROG_TYPE_LWT_IN and
* BPF_PROG_TYPE_LWT_XMIT.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
@ -2329,6 +2342,23 @@ union bpf_attr {
* "**y**".
* Return
* 0
*
* struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
* Description
* This helper gets a **struct bpf_sock** pointer such
* that all the fields in bpf_sock can be accessed.
* Return
* A **struct bpf_sock** pointer on success, or NULL in
* case of failure.
*
* struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
* Description
* This helper gets a **struct bpf_tcp_sock** pointer from a
* **struct bpf_sock** pointer.
*
* Return
* A **struct bpf_tcp_sock** pointer on success, or NULL in
* case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -2425,7 +2455,9 @@ union bpf_attr {
FN(msg_pop_data), \
FN(rc_pointer_rel), \
FN(spin_lock), \
FN(spin_unlock),
FN(spin_unlock), \
FN(sk_fullsock), \
FN(tcp_sock),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -2498,7 +2530,8 @@ enum bpf_hdr_start_off {
/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
enum bpf_lwt_encap_mode {
BPF_LWT_ENCAP_SEG6,
BPF_LWT_ENCAP_SEG6_INLINE
BPF_LWT_ENCAP_SEG6_INLINE,
BPF_LWT_ENCAP_IP,
};
#define __bpf_md_ptr(type, name) \
@ -2545,6 +2578,7 @@ struct __sk_buff {
__u64 tstamp;
__u32 wire_len;
__u32 gso_segs;
__bpf_md_ptr(struct bpf_sock *, sk);
};
struct bpf_tunnel_key {
@ -2586,7 +2620,15 @@ enum bpf_ret_code {
BPF_DROP = 2,
/* 3-6 reserved */
BPF_REDIRECT = 7,
/* >127 are reserved for prog type specific return codes */
/* >127 are reserved for prog type specific return codes.
*
* BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and
* BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been
* changed and should be routed based on its new L3 header.
* (This is an L3 redirect, as opposed to L2 redirect
* represented by BPF_REDIRECT above).
*/
BPF_LWT_REROUTE = 128,
};
struct bpf_sock {
@ -2596,14 +2638,52 @@ struct bpf_sock {
__u32 protocol;
__u32 mark;
__u32 priority;
__u32 src_ip4; /* Allows 1,2,4-byte read.
* Stored in network byte order.
/* IP address also allows 1 and 2 bytes access */
__u32 src_ip4;
__u32 src_ip6[4];
__u32 src_port; /* host byte order */
__u32 dst_port; /* network byte order */
__u32 dst_ip4;
__u32 dst_ip6[4];
__u32 state;
};
struct bpf_tcp_sock {
__u32 snd_cwnd; /* Sending congestion window */
__u32 srtt_us; /* smoothed round trip time << 3 in usecs */
__u32 rtt_min;
__u32 snd_ssthresh; /* Slow start size threshold */
__u32 rcv_nxt; /* What we want to receive next */
__u32 snd_nxt; /* Next sequence we send */
__u32 snd_una; /* First byte we want an ack for */
__u32 mss_cache; /* Cached effective mss, not including SACKS */
__u32 ecn_flags; /* ECN status bits. */
__u32 rate_delivered; /* saved rate sample: packets delivered */
__u32 rate_interval_us; /* saved rate sample: time elapsed */
__u32 packets_out; /* Packets which are "in flight" */
__u32 retrans_out; /* Retransmitted packets out */
__u32 total_retrans; /* Total retransmits for entire connection */
__u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
* total number of segments in.
*/
__u32 src_ip6[4]; /* Allows 1,2,4-byte read.
* Stored in network byte order.
__u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
* total number of data segments in.
*/
__u32 src_port; /* Allows 4-byte read.
* Stored in host byte order
__u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
* The total number of segments sent.
*/
__u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
* total number of data segments sent.
*/
__u32 lost_out; /* Lost packets */
__u32 sacked_out; /* SACK'd packets */
__u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
* sum(delta(rcv_nxt)), or how many bytes
* were acked.
*/
__u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
* sum(delta(snd_una)), or how many bytes
* were acked.
*/
};

View File

@ -925,6 +925,7 @@ enum {
enum {
LINK_XSTATS_TYPE_UNSPEC,
LINK_XSTATS_TYPE_BRIDGE,
LINK_XSTATS_TYPE_BOND,
__LINK_XSTATS_TYPE_MAX
};
#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)

View File

@ -22,6 +22,7 @@
*/
#include <stdlib.h>
#include <string.h>
#include <memory.h>
#include <unistd.h>
#include <asm/unistd.h>
@ -214,23 +215,35 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
{
void *finfo = NULL, *linfo = NULL;
union bpf_attr attr;
__u32 log_level;
__u32 name_len;
int fd;
if (!load_attr)
if (!load_attr || !log_buf != !log_buf_sz)
return -EINVAL;
log_level = load_attr->log_level;
if (log_level > 2 || (log_level && !log_buf))
return -EINVAL;
name_len = load_attr->name ? strlen(load_attr->name) : 0;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.prog_type = load_attr->prog_type;
attr.expected_attach_type = load_attr->expected_attach_type;
attr.insn_cnt = (__u32)load_attr->insns_cnt;
attr.insns = ptr_to_u64(load_attr->insns);
attr.license = ptr_to_u64(load_attr->license);
attr.log_buf = ptr_to_u64(NULL);
attr.log_size = 0;
attr.log_level = 0;
attr.log_level = log_level;
if (log_level) {
attr.log_buf = ptr_to_u64(log_buf);
attr.log_size = log_buf_sz;
} else {
attr.log_buf = ptr_to_u64(NULL);
attr.log_size = 0;
}
attr.kern_version = load_attr->kern_version;
attr.prog_ifindex = load_attr->prog_ifindex;
attr.prog_btf_fd = load_attr->prog_btf_fd;
@ -286,7 +299,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
goto done;
}
if (!log_buf || !log_buf_sz)
if (log_level || !log_buf)
goto done;
/* Try again with log */
@ -327,7 +340,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.prog_type = type;
attr.insn_cnt = (__u32)insns_cnt;
attr.insns = ptr_to_u64(insns);
@ -347,7 +360,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
@ -360,7 +373,7 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
@ -372,7 +385,7 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
@ -385,7 +398,7 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.value = ptr_to_u64(value);
@ -397,7 +410,7 @@ int bpf_map_delete_elem(int fd, const void *key)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
@ -408,7 +421,7 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.map_fd = fd;
attr.key = ptr_to_u64(key);
attr.next_key = ptr_to_u64(next_key);
@ -420,7 +433,7 @@ int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.pathname = ptr_to_u64((void *)pathname);
attr.bpf_fd = fd;
@ -431,7 +444,7 @@ int bpf_obj_get(const char *pathname)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.pathname = ptr_to_u64((void *)pathname);
return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
@ -442,7 +455,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
@ -455,7 +468,7 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_type = type;
@ -466,7 +479,7 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
@ -480,7 +493,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
union bpf_attr attr;
int ret;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.query.target_fd = target_fd;
attr.query.attach_type = type;
attr.query.query_flags = query_flags;
@ -501,7 +514,7 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
union bpf_attr attr;
int ret;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.test.prog_fd = prog_fd;
attr.test.data_in = ptr_to_u64(data);
attr.test.data_out = ptr_to_u64(data_out);
@ -526,7 +539,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
if (!test_attr->data_out && test_attr->data_size_out > 0)
return -EINVAL;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.test.prog_fd = test_attr->prog_fd;
attr.test.data_in = ptr_to_u64(test_attr->data_in);
attr.test.data_out = ptr_to_u64(test_attr->data_out);
@ -546,7 +559,7 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
union bpf_attr attr;
int err;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.start_id = start_id;
err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
@ -561,7 +574,7 @@ int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
union bpf_attr attr;
int err;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.start_id = start_id;
err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr));
@ -575,7 +588,7 @@ int bpf_prog_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.prog_id = id;
return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
@ -585,7 +598,7 @@ int bpf_map_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.map_id = id;
return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
@ -595,7 +608,7 @@ int bpf_btf_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.btf_id = id;
return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
@ -606,7 +619,7 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
union bpf_attr attr;
int err;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.info.bpf_fd = prog_fd;
attr.info.info_len = *info_len;
attr.info.info = ptr_to_u64(info);
@ -622,7 +635,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
memset(&attr, 0, sizeof(attr));
attr.raw_tracepoint.name = ptr_to_u64(name);
attr.raw_tracepoint.prog_fd = prog_fd;

View File

@ -85,6 +85,7 @@ struct bpf_load_program_attr {
__u32 line_info_rec_size;
const void *line_info;
__u32 line_info_cnt;
__u32 log_level;
};
/* Flags to direct loading requirements */

View File

@ -16,7 +16,8 @@
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
#define BTF_MAX_NR_TYPES 65535
#define BTF_MAX_NR_TYPES 0x7fffffff
#define BTF_MAX_STR_OFFSET 0x7fffffff
#define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \
((k) == BTF_KIND_VOLATILE) || \
@ -41,9 +42,8 @@ struct btf {
struct btf_ext_info {
/*
* info points to a deep copy of the individual info section
* (e.g. func_info and line_info) from the .BTF.ext.
* It does not include the __u32 rec_size.
* info points to the individual info section (e.g. func_info and
* line_info) from the .BTF.ext. It does not include the __u32 rec_size.
*/
void *info;
__u32 rec_size;
@ -51,8 +51,13 @@ struct btf_ext_info {
};
struct btf_ext {
union {
struct btf_ext_header *hdr;
void *data;
};
struct btf_ext_info func_info;
struct btf_ext_info line_info;
__u32 data_size;
};
struct btf_ext_info_sec {
@ -171,7 +176,7 @@ static int btf_parse_str_sec(struct btf *btf)
const char *start = btf->nohdr_data + hdr->str_off;
const char *end = start + btf->hdr->str_len;
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
start[0] || end[-1]) {
pr_debug("Invalid BTF string section\n");
return -EINVAL;
@ -366,8 +371,6 @@ void btf__free(struct btf *btf)
struct btf *btf__new(__u8 *data, __u32 size)
{
__u32 log_buf_size = 0;
char *log_buf = NULL;
struct btf *btf;
int err;
@ -377,15 +380,6 @@ struct btf *btf__new(__u8 *data, __u32 size)
btf->fd = -1;
log_buf = malloc(BPF_LOG_BUF_SIZE);
if (!log_buf) {
err = -ENOMEM;
goto done;
}
*log_buf = 0;
log_buf_size = BPF_LOG_BUF_SIZE;
btf->data = malloc(size);
if (!btf->data) {
err = -ENOMEM;
@ -395,17 +389,6 @@ struct btf *btf__new(__u8 *data, __u32 size)
memcpy(btf->data, data, size);
btf->data_size = size;
btf->fd = bpf_load_btf(btf->data, btf->data_size,
log_buf, log_buf_size, false);
if (btf->fd == -1) {
err = -errno;
pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno);
if (log_buf && *log_buf)
pr_warning("%s\n", log_buf);
goto done;
}
err = btf_parse_hdr(btf);
if (err)
goto done;
@ -417,8 +400,6 @@ struct btf *btf__new(__u8 *data, __u32 size)
err = btf_parse_type_sec(btf);
done:
free(log_buf);
if (err) {
btf__free(btf);
return ERR_PTR(err);
@ -427,16 +408,45 @@ done:
return btf;
}
int btf__load(struct btf *btf)
{
__u32 log_buf_size = BPF_LOG_BUF_SIZE;
char *log_buf = NULL;
int err = 0;
if (btf->fd >= 0)
return -EEXIST;
log_buf = malloc(log_buf_size);
if (!log_buf)
return -ENOMEM;
*log_buf = 0;
btf->fd = bpf_load_btf(btf->data, btf->data_size,
log_buf, log_buf_size, false);
if (btf->fd < 0) {
err = -errno;
pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno);
if (*log_buf)
pr_warning("%s\n", log_buf);
goto done;
}
done:
free(log_buf);
return err;
}
int btf__fd(const struct btf *btf)
{
return btf->fd;
}
void btf__get_strings(const struct btf *btf, const char **strings,
__u32 *str_len)
const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
{
*strings = btf->strings;
*str_len = btf->hdr->str_len;
*size = btf->data_size;
return btf->data;
}
const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
@ -474,7 +484,7 @@ int btf__get_from_id(__u32 id, struct btf **btf)
goto exit_free;
}
bzero(ptr, last_size);
memset(ptr, 0, last_size);
btf_info.btf = ptr_to_u64(ptr);
err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
@ -488,7 +498,7 @@ int btf__get_from_id(__u32 id, struct btf **btf)
goto exit_free;
}
ptr = temp_ptr;
bzero(ptr, last_size);
memset(ptr, 0, last_size);
btf_info.btf = ptr_to_u64(ptr);
err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
}
@ -583,7 +593,7 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
return 0;
}
struct btf_ext_sec_copy_param {
struct btf_ext_sec_setup_param {
__u32 off;
__u32 len;
__u32 min_rec_size;
@ -591,20 +601,14 @@ struct btf_ext_sec_copy_param {
const char *desc;
};
static int btf_ext_copy_info(struct btf_ext *btf_ext,
__u8 *data, __u32 data_size,
struct btf_ext_sec_copy_param *ext_sec)
static int btf_ext_setup_info(struct btf_ext *btf_ext,
struct btf_ext_sec_setup_param *ext_sec)
{
const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
const struct btf_ext_info_sec *sinfo;
struct btf_ext_info *ext_info;
__u32 info_left, record_size;
/* The start of the info sec (including the __u32 record_size). */
const void *info;
/* data and data_size do not include btf_ext_header from now on */
data = data + hdr->hdr_len;
data_size -= hdr->hdr_len;
void *info;
if (ext_sec->off & 0x03) {
pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
@ -612,16 +616,15 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
return -EINVAL;
}
if (data_size < ext_sec->off ||
ext_sec->len > data_size - ext_sec->off) {
info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off;
info_left = ext_sec->len;
if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) {
pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n",
ext_sec->desc, ext_sec->off, ext_sec->len);
ext_sec->desc, ext_sec->off, ext_sec->len);
return -EINVAL;
}
info = data + ext_sec->off;
info_left = ext_sec->len;
/* At least a record size */
if (info_left < sizeof(__u32)) {
pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc);
@ -633,7 +636,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
if (record_size < ext_sec->min_rec_size ||
record_size & 0x03) {
pr_debug("%s section in .BTF.ext has invalid record size %u\n",
ext_sec->desc, record_size);
ext_sec->desc, record_size);
return -EINVAL;
}
@ -679,42 +682,35 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
ext_info = ext_sec->ext_info;
ext_info->len = ext_sec->len - sizeof(__u32);
ext_info->rec_size = record_size;
ext_info->info = malloc(ext_info->len);
if (!ext_info->info)
return -ENOMEM;
memcpy(ext_info->info, info + sizeof(__u32), ext_info->len);
ext_info->info = info + sizeof(__u32);
return 0;
}
static int btf_ext_copy_func_info(struct btf_ext *btf_ext,
__u8 *data, __u32 data_size)
static int btf_ext_setup_func_info(struct btf_ext *btf_ext)
{
const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
struct btf_ext_sec_copy_param param = {
.off = hdr->func_info_off,
.len = hdr->func_info_len,
struct btf_ext_sec_setup_param param = {
.off = btf_ext->hdr->func_info_off,
.len = btf_ext->hdr->func_info_len,
.min_rec_size = sizeof(struct bpf_func_info_min),
.ext_info = &btf_ext->func_info,
.desc = "func_info"
};
return btf_ext_copy_info(btf_ext, data, data_size, &param);
return btf_ext_setup_info(btf_ext, &param);
}
static int btf_ext_copy_line_info(struct btf_ext *btf_ext,
__u8 *data, __u32 data_size)
static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
{
const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
struct btf_ext_sec_copy_param param = {
.off = hdr->line_info_off,
.len = hdr->line_info_len,
struct btf_ext_sec_setup_param param = {
.off = btf_ext->hdr->line_info_off,
.len = btf_ext->hdr->line_info_len,
.min_rec_size = sizeof(struct bpf_line_info_min),
.ext_info = &btf_ext->line_info,
.desc = "line_info",
};
return btf_ext_copy_info(btf_ext, data, data_size, &param);
return btf_ext_setup_info(btf_ext, &param);
}
static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
@ -754,9 +750,7 @@ void btf_ext__free(struct btf_ext *btf_ext)
{
if (!btf_ext)
return;
free(btf_ext->func_info.info);
free(btf_ext->line_info.info);
free(btf_ext->data);
free(btf_ext);
}
@ -773,13 +767,23 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
if (!btf_ext)
return ERR_PTR(-ENOMEM);
err = btf_ext_copy_func_info(btf_ext, data, size);
if (err) {
btf_ext__free(btf_ext);
return ERR_PTR(err);
btf_ext->data_size = size;
btf_ext->data = malloc(size);
if (!btf_ext->data) {
err = -ENOMEM;
goto done;
}
memcpy(btf_ext->data, data, size);
err = btf_ext_copy_line_info(btf_ext, data, size);
err = btf_ext_setup_func_info(btf_ext);
if (err)
goto done;
err = btf_ext_setup_line_info(btf_ext);
if (err)
goto done;
done:
if (err) {
btf_ext__free(btf_ext);
return ERR_PTR(err);
@ -788,6 +792,12 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
return btf_ext;
}
const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
{
*size = btf_ext->data_size;
return btf_ext->data;
}
static int btf_ext_reloc_info(const struct btf *btf,
const struct btf_ext_info *ext_info,
const char *sec_name, __u32 insns_cnt,
@ -836,7 +846,8 @@ static int btf_ext_reloc_info(const struct btf *btf,
return -ENOENT;
}
int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext,
int btf_ext__reloc_func_info(const struct btf *btf,
const struct btf_ext *btf_ext,
const char *sec_name, __u32 insns_cnt,
void **func_info, __u32 *cnt)
{
@ -844,7 +855,8 @@ int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ex
insns_cnt, func_info, cnt);
}
int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext,
int btf_ext__reloc_line_info(const struct btf *btf,
const struct btf_ext *btf_ext,
const char *sec_name, __u32 insns_cnt,
void **line_info, __u32 *cnt)
{
@ -1871,7 +1883,7 @@ static int btf_dedup_prim_types(struct btf_dedup *d)
*/
static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id)
{
return d->map[type_id] <= BTF_MAX_TYPE;
return d->map[type_id] <= BTF_MAX_NR_TYPES;
}
/*
@ -2022,7 +2034,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
canon_id = resolve_fwd_id(d, canon_id);
hypot_type_id = d->hypot_map[canon_id];
if (hypot_type_id <= BTF_MAX_TYPE)
if (hypot_type_id <= BTF_MAX_NR_TYPES)
return hypot_type_id == cand_id;
if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
@ -2241,7 +2253,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
__u32 h;
/* already deduped or is in process of deduping (loop detected) */
if (d->map[type_id] <= BTF_MAX_TYPE)
if (d->map[type_id] <= BTF_MAX_NR_TYPES)
return 0;
t = d->btf->types[type_id];
@ -2318,7 +2330,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
if (d->map[type_id] == BTF_IN_PROGRESS_ID)
return -ELOOP;
if (d->map[type_id] <= BTF_MAX_TYPE)
if (d->map[type_id] <= BTF_MAX_NR_TYPES)
return resolve_type_id(d, type_id);
t = d->btf->types[type_id];
@ -2498,7 +2510,7 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
resolved_type_id = resolve_type_id(d, type_id);
new_type_id = d->hypot_map[resolved_type_id];
if (new_type_id > BTF_MAX_TYPE)
if (new_type_id > BTF_MAX_NR_TYPES)
return -EINVAL;
return new_type_id;
}

View File

@ -57,6 +57,7 @@ struct btf_ext_header {
LIBBPF_API void btf__free(struct btf *btf);
LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
LIBBPF_API int btf__load(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
const char *type_name);
LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
@ -65,8 +66,7 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__fd(const struct btf *btf);
LIBBPF_API void btf__get_strings(const struct btf *btf, const char **strings,
__u32 *str_len);
LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
@ -76,6 +76,8 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext* btf_ext,
__u32 *size);
LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf,
const struct btf_ext *btf_ext,
const char *sec_name, __u32 insns_cnt,

View File

@ -307,7 +307,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
return -EINVAL;
}
bzero(prog, sizeof(*prog));
memset(prog, 0, sizeof(*prog));
prog->section_name = strdup(section_name);
if (!prog->section_name) {
@ -835,7 +835,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
obj->efile.maps_shndx = idx;
else if (strcmp(name, BTF_ELF_SEC) == 0) {
obj->btf = btf__new(data->d_buf, data->d_size);
if (IS_ERR(obj->btf)) {
if (IS_ERR(obj->btf) || btf__load(obj->btf)) {
pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
BTF_ELF_SEC, PTR_ERR(obj->btf));
obj->btf = NULL;
@ -1113,6 +1113,20 @@ err_free_new_name:
return -errno;
}
int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
{
if (!map || !max_entries)
return -EINVAL;
/* If map already created, its attributes can't be changed. */
if (map->fd >= 0)
return -EBUSY;
map->def.max_entries = max_entries;
return 0;
}
static int
bpf_object__probe_name(struct bpf_object *obj)
{
@ -1576,7 +1590,7 @@ bpf_program__load(struct bpf_program *prog,
struct bpf_prog_prep_result result;
bpf_program_prep_t preprocessor = prog->preprocessor;
bzero(&result, sizeof(result));
memset(&result, 0, sizeof(result));
err = preprocessor(prog, i, prog->insns,
prog->insns_cnt, &result);
if (err) {
@ -2317,6 +2331,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj)
return obj ? obj->kern_version : 0;
}
struct btf *bpf_object__btf(struct bpf_object *obj)
{
return obj ? obj->btf : NULL;
}
int bpf_object__btf_fd(const struct bpf_object *obj)
{
return obj->btf ? btf__fd(obj->btf) : -1;

View File

@ -89,6 +89,9 @@ LIBBPF_API int bpf_object__load(struct bpf_object *obj);
LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
LIBBPF_API const char *bpf_object__name(struct bpf_object *obj);
LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj);
struct btf;
LIBBPF_API struct btf *bpf_object__btf(struct bpf_object *obj);
LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
LIBBPF_API struct bpf_program *
@ -294,6 +297,7 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
bpf_map_clear_priv_t clear_priv);
LIBBPF_API void *bpf_map__priv(struct bpf_map *map);
LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map);
LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);

View File

@ -130,15 +130,19 @@ LIBBPF_0.0.2 {
bpf_probe_helper;
bpf_probe_map_type;
bpf_probe_prog_type;
bpf_map__resize;
bpf_map_lookup_elem_flags;
bpf_object__btf;
bpf_object__find_map_fd_by_name;
bpf_get_link_xdp_id;
btf__dedup;
btf__get_map_kv_tids;
btf__get_nr_types;
btf__get_strings;
btf__get_raw_data;
btf__load;
btf_ext__free;
btf_ext__func_info_rec_size;
btf_ext__get_raw_data;
btf_ext__line_info_rec_size;
btf_ext__new;
btf_ext__reloc_func_info;

View File

@ -29,3 +29,4 @@ test_netcnt
test_section_names
test_tcpnotify_user
test_libbpf
alu32

View File

@ -23,42 +23,19 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
test_netcnt test_tcpnotify_user
test_netcnt test_tcpnotify_user test_sock_fields
BPF_OBJ_FILES = \
test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o \
test_tcpnotify_kern.o sample_map_ret0.o test_tcpbpf_kern.o \
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o \
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o \
test_tunnel_kern.o test_sockhash_kern.o test_lwt_seg6local.o \
sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \
xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o
BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
TEST_GEN_FILES = $(BPF_OBJ_FILES)
# Objects are built with default compilation flags and with sub-register
# code-gen enabled.
BPF_OBJ_FILES_DUAL_COMPILE = \
test_pkt_access.o test_pkt_access.o test_xdp.o test_adjust_tail.o \
test_l4lb.o test_l4lb_noinline.o test_xdp_noinline.o test_tcp_estats.o \
test_obj_id.o test_pkt_md_access.o test_tracepoint.o \
test_stacktrace_map.o test_stacktrace_map.o test_stacktrace_build_id.o \
test_stacktrace_build_id.o test_get_stack_rawtp.o \
test_get_stack_rawtp.o test_tracepoint.o test_sk_lookup_kern.o \
test_queue_map.o test_stack_map.o
TEST_GEN_FILES = $(BPF_OBJ_FILES) $(BPF_OBJ_FILES_DUAL_COMPILE)
# Also test sub-register code-gen if LLVM + kernel both has eBPF v3 processor
# support which is the first version to contain both ALU32 and JMP32
# instructions.
# Also test sub-register code-gen if LLVM has eBPF v3 processor support which
# contains both ALU32 and JMP32 instructions.
SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \
$(CLANG) -target bpf -O2 -emit-llvm -S -x c - -o - | \
$(LLC) -mattr=+alu32 -mcpu=probe 2>&1 | \
$(LLC) -mattr=+alu32 -mcpu=v3 2>&1 | \
grep 'if w')
ifneq ($(SUBREG_CODEGEN),)
TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES_DUAL_COMPILE))
TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES))
endif
# Order correspond to 'make run_tests' order
@ -73,7 +50,8 @@ TEST_PROGS := test_kmod.sh \
test_lirc_mode2.sh \
test_skb_cgroup_id.sh \
test_flow_dissector.sh \
test_xdp_vlan.sh
test_xdp_vlan.sh \
test_lwt_ip_encap.sh
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
@ -111,6 +89,7 @@ $(OUTPUT)/test_progs: trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
.PHONY: force
@ -188,7 +167,8 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(ALU32_BUILD_DIR) \
$(CC) $(CFLAGS) -o $(ALU32_BUILD_DIR)/test_progs_32 $< \
trace_helpers.c $(OUTPUT)/libbpf.a $(LDLIBS)
$(ALU32_BUILD_DIR)/%.o: %.c $(ALU32_BUILD_DIR) $(ALU32_BUILD_DIR)/test_progs_32
$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR) \
$(ALU32_BUILD_DIR)/test_progs_32
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \
@ -200,7 +180,7 @@ endif
# Have one program compiled without "-target bpf" to test whether libbpf loads
# it successfully
$(OUTPUT)/test_xdp.o: test_xdp.c
$(OUTPUT)/test_xdp.o: progs/test_xdp.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
@ -208,7 +188,7 @@ ifeq ($(DWARF2BTF),y)
$(BTF_PAHOLE) -J $@
endif
$(OUTPUT)/%.o: %.c
$(OUTPUT)/%.o: progs/%.c
$(CLANG) $(CLANG_FLAGS) \
-O2 -target bpf -emit-llvm -c $< -o - | \
$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@

View File

@ -176,6 +176,10 @@ static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) =
(void *) BPF_FUNC_spin_lock;
static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) =
(void *) BPF_FUNC_spin_unlock;
static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) =
(void *) BPF_FUNC_sk_fullsock;
static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) =
(void *) BPF_FUNC_tcp_sock;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions

View File

@ -58,4 +58,13 @@ static inline unsigned int bpf_num_possible_cpus(void)
# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
#ifndef sizeof_field
#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
#endif
#ifndef offsetofend
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
#endif
#endif /* __BPF_UTIL__ */

View File

@ -0,0 +1,85 @@
// SPDX-License-Identifier: GPL-2.0
#include <stddef.h>
#include <string.h>
#include <linux/bpf.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
struct grehdr {
__be16 flags;
__be16 protocol;
};
SEC("encap_gre")
int bpf_lwt_encap_gre(struct __sk_buff *skb)
{
struct encap_hdr {
struct iphdr iph;
struct grehdr greh;
} hdr;
int err;
memset(&hdr, 0, sizeof(struct encap_hdr));
hdr.iph.ihl = 5;
hdr.iph.version = 4;
hdr.iph.ttl = 0x40;
hdr.iph.protocol = 47; /* IPPROTO_GRE */
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
hdr.iph.saddr = 0x640110ac; /* 172.16.1.100 */
hdr.iph.daddr = 0x641010ac; /* 172.16.16.100 */
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
hdr.iph.saddr = 0xac100164; /* 172.16.1.100 */
hdr.iph.daddr = 0xac101064; /* 172.16.16.100 */
#else
#error "Fix your compiler's __BYTE_ORDER__?!"
#endif
hdr.iph.tot_len = bpf_htons(skb->len + sizeof(struct encap_hdr));
hdr.greh.protocol = skb->protocol;
err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr,
sizeof(struct encap_hdr));
if (err)
return BPF_DROP;
return BPF_LWT_REROUTE;
}
SEC("encap_gre6")
int bpf_lwt_encap_gre6(struct __sk_buff *skb)
{
struct encap_hdr {
struct ipv6hdr ip6hdr;
struct grehdr greh;
} hdr;
int err;
memset(&hdr, 0, sizeof(struct encap_hdr));
hdr.ip6hdr.version = 6;
hdr.ip6hdr.payload_len = bpf_htons(skb->len + sizeof(struct grehdr));
hdr.ip6hdr.nexthdr = 47; /* IPPROTO_GRE */
hdr.ip6hdr.hop_limit = 0x40;
/* fb01::1 */
hdr.ip6hdr.saddr.s6_addr[0] = 0xfb;
hdr.ip6hdr.saddr.s6_addr[1] = 1;
hdr.ip6hdr.saddr.s6_addr[15] = 1;
/* fb10::1 */
hdr.ip6hdr.daddr.s6_addr[0] = 0xfb;
hdr.ip6hdr.daddr.s6_addr[1] = 0x10;
hdr.ip6hdr.daddr.s6_addr[15] = 1;
hdr.greh.protocol = skb->protocol;
err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr,
sizeof(struct encap_hdr));
if (err)
return BPF_DROP;
return BPF_LWT_REROUTE;
}
char _license[] SEC("license") = "GPL";

View File

@ -0,0 +1,152 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <linux/bpf.h>
#include <netinet/in.h>
#include <stdbool.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
enum bpf_array_idx {
SRV_IDX,
CLI_IDX,
__NR_BPF_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") addr_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct sockaddr_in6),
.max_entries = __NR_BPF_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") sock_result_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct bpf_sock),
.max_entries = __NR_BPF_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") tcp_sock_result_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct bpf_tcp_sock),
.max_entries = __NR_BPF_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") linum_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.max_entries = 1,
};
static bool is_loopback6(__u32 *a6)
{
return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
}
static void skcpy(struct bpf_sock *dst,
const struct bpf_sock *src)
{
dst->bound_dev_if = src->bound_dev_if;
dst->family = src->family;
dst->type = src->type;
dst->protocol = src->protocol;
dst->mark = src->mark;
dst->priority = src->priority;
dst->src_ip4 = src->src_ip4;
dst->src_ip6[0] = src->src_ip6[0];
dst->src_ip6[1] = src->src_ip6[1];
dst->src_ip6[2] = src->src_ip6[2];
dst->src_ip6[3] = src->src_ip6[3];
dst->src_port = src->src_port;
dst->dst_ip4 = src->dst_ip4;
dst->dst_ip6[0] = src->dst_ip6[0];
dst->dst_ip6[1] = src->dst_ip6[1];
dst->dst_ip6[2] = src->dst_ip6[2];
dst->dst_ip6[3] = src->dst_ip6[3];
dst->dst_port = src->dst_port;
dst->state = src->state;
}
static void tpcpy(struct bpf_tcp_sock *dst,
const struct bpf_tcp_sock *src)
{
dst->snd_cwnd = src->snd_cwnd;
dst->srtt_us = src->srtt_us;
dst->rtt_min = src->rtt_min;
dst->snd_ssthresh = src->snd_ssthresh;
dst->rcv_nxt = src->rcv_nxt;
dst->snd_nxt = src->snd_nxt;
dst->snd_una = src->snd_una;
dst->mss_cache = src->mss_cache;
dst->ecn_flags = src->ecn_flags;
dst->rate_delivered = src->rate_delivered;
dst->rate_interval_us = src->rate_interval_us;
dst->packets_out = src->packets_out;
dst->retrans_out = src->retrans_out;
dst->total_retrans = src->total_retrans;
dst->segs_in = src->segs_in;
dst->data_segs_in = src->data_segs_in;
dst->segs_out = src->segs_out;
dst->data_segs_out = src->data_segs_out;
dst->lost_out = src->lost_out;
dst->sacked_out = src->sacked_out;
dst->bytes_received = src->bytes_received;
dst->bytes_acked = src->bytes_acked;
}
#define RETURN { \
linum = __LINE__; \
bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \
return 1; \
}
SEC("cgroup_skb/egress")
int read_sock_fields(struct __sk_buff *skb)
{
__u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx;
struct sockaddr_in6 *srv_sa6, *cli_sa6;
struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret;
__u32 linum, idx0 = 0;
sk = skb->sk;
if (!sk || sk->state == 10)
RETURN;
sk = bpf_sk_fullsock(sk);
if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
!is_loopback6(sk->src_ip6))
RETURN;
tp = bpf_tcp_sock(sk);
if (!tp)
RETURN;
srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
if (!srv_sa6 || !cli_sa6)
RETURN;
if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
idx = srv_idx;
else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
idx = cli_idx;
else
RETURN;
sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx);
tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx);
if (!sk_ret || !tp_ret)
RETURN;
skcpy(sk_ret, sk);
tpcpy(tp_ret, tp);
RETURN;
}
char _license[] SEC("license") = "GPL";

View File

@ -5879,15 +5879,17 @@ static void dump_btf_strings(const char *strs, __u32 len)
static int do_test_dedup(unsigned int test_num)
{
const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
int err = 0, i;
__u32 test_nr_types, expect_nr_types, test_str_len, expect_str_len;
void *raw_btf;
unsigned int raw_btf_size;
__u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size;
const struct btf_header *test_hdr, *expect_hdr;
struct btf *test_btf = NULL, *expect_btf = NULL;
const void *test_btf_data, *expect_btf_data;
const char *ret_test_next_str, *ret_expect_next_str;
const char *test_strs, *expect_strs;
const char *test_str_cur, *test_str_end;
const char *expect_str_cur, *expect_str_end;
unsigned int raw_btf_size;
void *raw_btf;
int err = 0, i;
fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr);
@ -5924,23 +5926,34 @@ static int do_test_dedup(unsigned int test_num)
goto done;
}
btf__get_strings(test_btf, &test_strs, &test_str_len);
btf__get_strings(expect_btf, &expect_strs, &expect_str_len);
if (CHECK(test_str_len != expect_str_len,
"test_str_len:%u != expect_str_len:%u",
test_str_len, expect_str_len)) {
test_btf_data = btf__get_raw_data(test_btf, &test_btf_size);
expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size);
if (CHECK(test_btf_size != expect_btf_size,
"test_btf_size:%u != expect_btf_size:%u",
test_btf_size, expect_btf_size)) {
err = -1;
goto done;
}
test_hdr = test_btf_data;
test_strs = test_btf_data + test_hdr->str_off;
expect_hdr = expect_btf_data;
expect_strs = expect_btf_data + expect_hdr->str_off;
if (CHECK(test_hdr->str_len != expect_hdr->str_len,
"test_hdr->str_len:%u != expect_hdr->str_len:%u",
test_hdr->str_len, expect_hdr->str_len)) {
fprintf(stderr, "\ntest strings:\n");
dump_btf_strings(test_strs, test_str_len);
dump_btf_strings(test_strs, test_hdr->str_len);
fprintf(stderr, "\nexpected strings:\n");
dump_btf_strings(expect_strs, expect_str_len);
dump_btf_strings(expect_strs, expect_hdr->str_len);
err = -1;
goto done;
}
test_str_cur = test_strs;
test_str_end = test_strs + test_str_len;
test_str_end = test_strs + test_hdr->str_len;
expect_str_cur = expect_strs;
expect_str_end = expect_strs + expect_str_len;
expect_str_end = expect_strs + expect_hdr->str_len;
while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) {
size_t test_len, expect_len;

View File

@ -0,0 +1,376 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Setup/topology:
#
# NS1 NS2 NS3
# veth1 <---> veth2 veth3 <---> veth4 (the top route)
# veth5 <---> veth6 veth7 <---> veth8 (the bottom route)
#
# each vethN gets IPv[4|6]_N address
#
# IPv*_SRC = IPv*_1
# IPv*_DST = IPv*_4
#
# all tests test pings from IPv*_SRC to IPv*_DST
#
# by default, routes are configured to allow packets to go
# IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route)
#
# a GRE device is installed in NS3 with IPv*_GRE, and
# NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8
# (the bottom route)
#
# Tests:
#
# 1. routes NS2->IPv*_DST are brought down, so the only way a ping
# from IP*_SRC to IP*_DST can work is via IPv*_GRE
#
# 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1
# that encaps the packets with an IP/GRE header to route to IPv*_GRE
#
# ping: SRC->[encap at veth1:egress]->GRE:decap->DST
# ping replies go DST->SRC directly
#
# 2b. in an ingress test, a bpf LWT_IN program is installed on veth2
# that encaps the packets with an IP/GRE header to route to IPv*_GRE
#
# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
# ping replies go DST->SRC directly
if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root"
echo "FAIL"
exit 1
fi
readonly NS1="ns1-$(mktemp -u XXXXXX)"
readonly NS2="ns2-$(mktemp -u XXXXXX)"
readonly NS3="ns3-$(mktemp -u XXXXXX)"
readonly IPv4_1="172.16.1.100"
readonly IPv4_2="172.16.2.100"
readonly IPv4_3="172.16.3.100"
readonly IPv4_4="172.16.4.100"
readonly IPv4_5="172.16.5.100"
readonly IPv4_6="172.16.6.100"
readonly IPv4_7="172.16.7.100"
readonly IPv4_8="172.16.8.100"
readonly IPv4_GRE="172.16.16.100"
readonly IPv4_SRC=$IPv4_1
readonly IPv4_DST=$IPv4_4
readonly IPv6_1="fb01::1"
readonly IPv6_2="fb02::1"
readonly IPv6_3="fb03::1"
readonly IPv6_4="fb04::1"
readonly IPv6_5="fb05::1"
readonly IPv6_6="fb06::1"
readonly IPv6_7="fb07::1"
readonly IPv6_8="fb08::1"
readonly IPv6_GRE="fb10::1"
readonly IPv6_SRC=$IPv6_1
readonly IPv6_DST=$IPv6_4
TEST_STATUS=0
TESTS_SUCCEEDED=0
TESTS_FAILED=0
process_test_results()
{
if [[ "${TEST_STATUS}" -eq 0 ]] ; then
echo "PASS"
TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1))
else
echo "FAIL"
TESTS_FAILED=$((TESTS_FAILED+1))
fi
}
print_test_summary_and_exit()
{
echo "passed tests: ${TESTS_SUCCEEDED}"
echo "failed tests: ${TESTS_FAILED}"
if [ "${TESTS_FAILED}" -eq "0" ] ; then
exit 0
else
exit 1
fi
}
setup()
{
set -e # exit on error
TEST_STATUS=0
# create devices and namespaces
ip netns add "${NS1}"
ip netns add "${NS2}"
ip netns add "${NS3}"
ip link add veth1 type veth peer name veth2
ip link add veth3 type veth peer name veth4
ip link add veth5 type veth peer name veth6
ip link add veth7 type veth peer name veth8
ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1
ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1
ip link set veth1 netns ${NS1}
ip link set veth2 netns ${NS2}
ip link set veth3 netns ${NS2}
ip link set veth4 netns ${NS3}
ip link set veth5 netns ${NS1}
ip link set veth6 netns ${NS2}
ip link set veth7 netns ${NS2}
ip link set veth8 netns ${NS3}
# configure addesses: the top route (1-2-3-4)
ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1
ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2
ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3
ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4
ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1
ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2
ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3
ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4
# configure addresses: the bottom route (5-6-7-8)
ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5
ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6
ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7
ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8
ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5
ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6
ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7
ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8
ip -netns ${NS1} link set dev veth1 up
ip -netns ${NS2} link set dev veth2 up
ip -netns ${NS2} link set dev veth3 up
ip -netns ${NS3} link set dev veth4 up
ip -netns ${NS1} link set dev veth5 up
ip -netns ${NS2} link set dev veth6 up
ip -netns ${NS2} link set dev veth7 up
ip -netns ${NS3} link set dev veth8 up
# configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default;
# the bottom route to specific bottom addresses
# NS1
# top route
ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1
ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} # go top by default
ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1
ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} # go top by default
# bottom route
ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5
ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6}
ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6}
ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5
ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6}
ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6}
# NS2
# top route
ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2
ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3
ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2
ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3
# bottom route
ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6
ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7
ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6
ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7
# NS3
# top route
ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4
ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3}
ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3}
ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4
ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3}
ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3}
# bottom route
ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8
ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7}
ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7}
ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8
ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7}
ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7}
# configure IPv4 GRE device in NS3, and a route to it via the "bottom" route
ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
ip -netns ${NS3} link set gre_dev up
ip -netns ${NS3} addr add ${IPv4_GRE} nodad dev gre_dev
ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6}
ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8}
# configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
ip -netns ${NS3} link set gre6_dev up
ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6}
ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8}
# rp_filter gets confused by what these tests are doing, so disable it
ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
sleep 1 # reduce flakiness
set +e
}
cleanup()
{
ip netns del ${NS1} 2> /dev/null
ip netns del ${NS2} 2> /dev/null
ip netns del ${NS3} 2> /dev/null
}
trap cleanup EXIT
remove_routes_to_gredev()
{
ip -netns ${NS1} route del ${IPv4_GRE} dev veth5
ip -netns ${NS2} route del ${IPv4_GRE} dev veth7
ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5
ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7
}
add_unreachable_routes_to_gredev()
{
ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32
ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32
ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128
ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128
}
test_ping()
{
local readonly PROTO=$1
local readonly EXPECTED=$2
local RET=0
if [ "${PROTO}" == "IPv4" ] ; then
ip netns exec ${NS1} ping -c 1 -W 1 -I ${IPv4_SRC} ${IPv4_DST} 2>&1 > /dev/null
RET=$?
elif [ "${PROTO}" == "IPv6" ] ; then
ip netns exec ${NS1} ping6 -c 1 -W 6 -I ${IPv6_SRC} ${IPv6_DST} 2>&1 > /dev/null
RET=$?
else
echo " test_ping: unknown PROTO: ${PROTO}"
TEST_STATUS=1
fi
if [ "0" != "${RET}" ]; then
RET=1
fi
if [ "${EXPECTED}" != "${RET}" ] ; then
echo " test_ping failed: expected: ${EXPECTED}; got ${RET}"
TEST_STATUS=1
fi
}
test_egress()
{
local readonly ENCAP=$1
echo "starting egress ${ENCAP} encap test"
setup
# by default, pings work
test_ping IPv4 0
test_ping IPv6 0
# remove NS2->DST routes, ping fails
ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3
ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
test_ping IPv4 1
test_ping IPv6 1
# install replacement routes (LWT/eBPF), pings succeed
if [ "${ENCAP}" == "IPv4" ] ; then
ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
elif [ "${ENCAP}" == "IPv6" ] ; then
ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
else
echo " unknown encap ${ENCAP}"
TEST_STATUS=1
fi
test_ping IPv4 0
test_ping IPv6 0
# a negative test: remove routes to GRE devices: ping fails
remove_routes_to_gredev
test_ping IPv4 1
test_ping IPv6 1
# another negative test
add_unreachable_routes_to_gredev
test_ping IPv4 1
test_ping IPv6 1
cleanup
process_test_results
}
test_ingress()
{
local readonly ENCAP=$1
echo "starting ingress ${ENCAP} encap test"
setup
# need to wait a bit for IPv6 to autoconf, otherwise
# ping6 sometimes fails with "unable to bind to address"
# by default, pings work
test_ping IPv4 0
test_ping IPv6 0
# remove NS2->DST routes, pings fail
ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3
ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
test_ping IPv4 1
test_ping IPv6 1
# install replacement routes (LWT/eBPF), pings succeed
if [ "${ENCAP}" == "IPv4" ] ; then
ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
elif [ "${ENCAP}" == "IPv6" ] ; then
ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
else
echo "FAIL: unknown encap ${ENCAP}"
fi
test_ping IPv4 0
test_ping IPv6 0
# a negative test: remove routes to GRE devices: ping fails
remove_routes_to_gredev
test_ping IPv4 1
test_ping IPv6 1
# another negative test
add_unreachable_routes_to_gredev
test_ping IPv4 1
test_ping IPv6 1
cleanup
process_test_results
}
test_egress IPv4
test_egress IPv6
test_ingress IPv4
test_ingress IPv6
print_test_summary_and_exit

View File

@ -20,6 +20,7 @@
#define MAX_INSNS 512
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static bool verbose = false;
struct sock_test {
const char *descr;
@ -325,6 +326,7 @@ static int load_sock_prog(const struct bpf_insn *prog,
enum bpf_attach_type attach_type)
{
struct bpf_load_program_attr attr;
int ret;
memset(&attr, 0, sizeof(struct bpf_load_program_attr));
attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
@ -332,8 +334,13 @@ static int load_sock_prog(const struct bpf_insn *prog,
attr.insns = prog;
attr.insns_cnt = probe_prog_length(attr.insns);
attr.license = "GPL";
attr.log_level = 2;
return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
if (verbose && ret < 0)
fprintf(stderr, "%s\n", bpf_log_buf);
return ret;
}
static int attach_sock_prog(int cgfd, int progfd,

View File

@ -0,0 +1,327 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <sys/socket.h>
#include <sys/epoll.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
enum bpf_array_idx {
SRV_IDX,
CLI_IDX,
__NR_BPF_ARRAY_IDX,
};
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
if (__ret) { \
printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
printf(format); \
printf("\n"); \
exit(-1); \
} \
})
#define TEST_CGROUP "/test-bpf-sock-fields"
#define DATA "Hello BPF!"
#define DATA_LEN sizeof(DATA)
static struct sockaddr_in6 srv_sa6, cli_sa6;
static int linum_map_fd;
static int addr_map_fd;
static int tp_map_fd;
static int sk_map_fd;
static __u32 srv_idx = SRV_IDX;
static __u32 cli_idx = CLI_IDX;
static void init_loopback6(struct sockaddr_in6 *sa6)
{
memset(sa6, 0, sizeof(*sa6));
sa6->sin6_family = AF_INET6;
sa6->sin6_addr = in6addr_loopback;
}
static void print_sk(const struct bpf_sock *sk)
{
char src_ip4[24], dst_ip4[24];
char src_ip6[64], dst_ip6[64];
inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
"src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
"dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
sk->mark, sk->priority,
sk->src_ip4, src_ip4,
sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
src_ip6, sk->src_port,
sk->dst_ip4, dst_ip4,
sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
dst_ip6, ntohs(sk->dst_port));
}
static void print_tp(const struct bpf_tcp_sock *tp)
{
printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
"snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
"rate_delivered:%u rate_interval_us:%u packets_out:%u "
"retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
"segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
"bytes_received:%llu bytes_acked:%llu\n",
tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
tp->packets_out, tp->retrans_out, tp->total_retrans,
tp->segs_in, tp->data_segs_in, tp->segs_out,
tp->data_segs_out, tp->lost_out, tp->sacked_out,
tp->bytes_received, tp->bytes_acked);
}
static void check_result(void)
{
struct bpf_tcp_sock srv_tp, cli_tp;
struct bpf_sock srv_sk, cli_sk;
__u32 linum, idx0 = 0;
int err;
err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum);
CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk);
CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp);
CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk);
CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)",
"err:%d errno:%d", err, errno);
err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp);
CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)",
"err:%d errno:%d", err, errno);
printf("srv_sk: ");
print_sk(&srv_sk);
printf("\n");
printf("cli_sk: ");
print_sk(&cli_sk);
printf("\n");
printf("srv_tp: ");
print_tp(&srv_tp);
printf("\n");
printf("cli_tp: ");
print_tp(&cli_tp);
printf("\n");
CHECK(srv_sk.state == 10 ||
!srv_sk.state ||
srv_sk.family != AF_INET6 ||
srv_sk.protocol != IPPROTO_TCP ||
memcmp(srv_sk.src_ip6, &in6addr_loopback,
sizeof(srv_sk.src_ip6)) ||
memcmp(srv_sk.dst_ip6, &in6addr_loopback,
sizeof(srv_sk.dst_ip6)) ||
srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
srv_sk.dst_port != cli_sa6.sin6_port,
"Unexpected srv_sk", "Check srv_sk output. linum:%u", linum);
CHECK(cli_sk.state == 10 ||
!cli_sk.state ||
cli_sk.family != AF_INET6 ||
cli_sk.protocol != IPPROTO_TCP ||
memcmp(cli_sk.src_ip6, &in6addr_loopback,
sizeof(cli_sk.src_ip6)) ||
memcmp(cli_sk.dst_ip6, &in6addr_loopback,
sizeof(cli_sk.dst_ip6)) ||
cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
cli_sk.dst_port != srv_sa6.sin6_port,
"Unexpected cli_sk", "Check cli_sk output. linum:%u", linum);
CHECK(srv_tp.data_segs_out != 1 ||
srv_tp.data_segs_in ||
srv_tp.snd_cwnd != 10 ||
srv_tp.total_retrans ||
srv_tp.bytes_acked != DATA_LEN,
"Unexpected srv_tp", "Check srv_tp output. linum:%u", linum);
CHECK(cli_tp.data_segs_out ||
cli_tp.data_segs_in != 1 ||
cli_tp.snd_cwnd != 10 ||
cli_tp.total_retrans ||
cli_tp.bytes_received != DATA_LEN,
"Unexpected cli_tp", "Check cli_tp output. linum:%u", linum);
}
static void test(void)
{
int listen_fd, cli_fd, accept_fd, epfd, err;
struct epoll_event ev;
socklen_t addrlen;
addrlen = sizeof(struct sockaddr_in6);
ev.events = EPOLLIN;
epfd = epoll_create(1);
CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno);
/* Prepare listen_fd */
listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d",
listen_fd, errno);
init_loopback6(&srv_sa6);
err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6));
CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno);
err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno);
err = listen(listen_fd, 1);
CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno);
/* Prepare cli_fd */
cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno);
init_loopback6(&cli_sa6);
err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6));
CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno);
err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d",
err, errno);
/* Update addr_map with srv_sa6 and cli_sa6 */
err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0);
CHECK(err, "map_update", "err:%d errno:%d", err, errno);
err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0);
CHECK(err, "map_update", "err:%d errno:%d", err, errno);
/* Connect from cli_sa6 to srv_sa6 */
err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen);
printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n",
ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port));
CHECK(err && errno != EINPROGRESS,
"connect(cli_fd)", "err:%d errno:%d", err, errno);
ev.data.fd = listen_fd;
err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev);
CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d",
err, errno);
/* Accept the connection */
/* Have some timeout in accept(listen_fd). Just in case. */
err = epoll_wait(epfd, &ev, 1, 1000);
CHECK(err != 1 || ev.data.fd != listen_fd,
"epoll_wait(listen_fd)",
"err:%d errno:%d ev.data.fd:%d listen_fd:%d",
err, errno, ev.data.fd, listen_fd);
accept_fd = accept(listen_fd, NULL, NULL);
CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d",
accept_fd, errno);
close(listen_fd);
/* Send some data from accept_fd to cli_fd */
err = send(accept_fd, DATA, DATA_LEN, 0);
CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d",
err, errno);
/* Have some timeout in recv(cli_fd). Just in case. */
ev.data.fd = cli_fd;
err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev);
CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d",
err, errno);
err = epoll_wait(epfd, &ev, 1, 1000);
CHECK(err != 1 || ev.data.fd != cli_fd,
"epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d",
err, errno, ev.data.fd, cli_fd);
err = recv(cli_fd, NULL, 0, MSG_TRUNC);
CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno);
close(epfd);
close(accept_fd);
close(cli_fd);
check_result();
}
int main(int argc, char **argv)
{
struct bpf_prog_load_attr attr = {
.file = "test_sock_fields_kern.o",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.expected_attach_type = BPF_CGROUP_INET_EGRESS,
};
int cgroup_fd, prog_fd, err;
struct bpf_object *obj;
struct bpf_map *map;
err = setup_cgroup_environment();
CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d",
err, errno);
atexit(cleanup_cgroup_environment);
/* Create a cgroup, get fd, and join it */
cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
CHECK(cgroup_fd == -1, "create_and_get_cgroup()",
"cgroup_fd:%d errno:%d", cgroup_fd, errno);
err = join_cgroup(TEST_CGROUP);
CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
"err:%d errno%d", err, errno);
close(cgroup_fd);
map = bpf_object__find_map_by_name(obj, "addr_map");
CHECK(!map, "cannot find addr_map", "(null)");
addr_map_fd = bpf_map__fd(map);
map = bpf_object__find_map_by_name(obj, "sock_result_map");
CHECK(!map, "cannot find sock_result_map", "(null)");
sk_map_fd = bpf_map__fd(map);
map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map");
CHECK(!map, "cannot find tcp_sock_result_map", "(null)");
tp_map_fd = bpf_map__fd(map);
map = bpf_object__find_map_by_name(obj, "linum_map");
CHECK(!map, "cannot find linum_map", "(null)");
linum_map_fd = bpf_map__fd(map);
test();
bpf_object__close(obj);
cleanup_cgroup_environment();
printf("PASS\n");
return 0;
}

View File

@ -547,7 +547,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.errstr = "cannot write into socket",
.errstr = "cannot write into sock",
.result = REJECT,
},
{
@ -562,7 +562,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.errstr = "invalid bpf_sock access off=0 size=8",
.errstr = "invalid sock access off=0 size=8",
.result = REJECT,
},
{

View File

@ -0,0 +1,384 @@
{
"skb->sk: no NULL check",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid mem access 'sock_common_or_null'",
},
{
"skb->sk: sk->family [non fullsock field]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, family)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"skb->sk: sk->type [fullsock field]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, type)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid sock_common access",
},
{
"bpf_sk_fullsock(skb->sk): no !skb->sk check",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "type=sock_common_or_null expected=sock_common",
},
{
"sk_fullsock(skb->sk): no NULL check on ret",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid mem access 'sock_or_null'",
},
{
"sk_fullsock(skb->sk): sk->type [fullsock field]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->family [non fullsock field]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, family)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->state [narrow load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, state)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->dst_port [narrow load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid sock access",
},
{
"sk_fullsock(skb->sk): sk->dst_ip6 [load 2nd byte]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_ip6[0]) + 1),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->type [narrow load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): sk->protocol [narrow load]",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, protocol)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"sk_fullsock(skb->sk): beyond last field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, state)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid sock access",
},
{
"bpf_tcp_sock(skb->sk): no !skb->sk check",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "type=sock_common_or_null expected=sock_common",
},
{
"bpf_tcp_sock(skb->sk): no NULL check on ret",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid mem access 'tcp_sock_or_null'",
},
{
"bpf_tcp_sock(skb->sk): tp->snd_cwnd",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"bpf_tcp_sock(skb->sk): tp->bytes_acked",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, bytes_acked)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"bpf_tcp_sock(skb->sk): beyond last field",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_tcp_sock, bytes_acked)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = REJECT,
.errstr = "invalid tcp_sock access",
},
{
"bpf_tcp_sock(bpf_sk_fullsock(skb->sk)): tp->snd_cwnd",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
.result = ACCEPT,
},
{
"bpf_sk_release(skb->sk)",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "type=sock_common expected=sock",
},
{
"bpf_sk_release(bpf_sk_fullsock(skb->sk))",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "reference has not been acquired before",
},
{
"bpf_sk_release(bpf_tcp_sock(skb->sk))",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "type=tcp_sock expected=sock",
},

View File

@ -365,7 +365,7 @@
},
.result = REJECT,
//.errstr = "same insn cannot be used with different pointers",
.errstr = "cannot write into socket",
.errstr = "cannot write into sock",
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{