Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Fix bogus compilter warning in nfnetlink_queue, from Florian Westphal.

2) Don't run conntrack on vrf with !dflt qdisc, from Nicolas Dichtel.

3) Fix nft_pipapo bucket load in AVX2 lookup routine for six 8-bit
   groups, from Stefano Brivio.

4) Break rule evaluation on malformed TCP options.

5) Use socat instead of nc in selftests/netfilter/nft_zones_many.sh,
   also from Florian

6) Fix KCSAN data-race in conntrack timeout updates, from Eric Dumazet.

* git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf:
  netfilter: conntrack: annotate data-races around ct->timeout
  selftests: netfilter: switch zone stress to socat
  netfilter: nft_exthdr: break evaluation if setting TCP option fails
  selftests: netfilter: Add correctness test for mac,net set type
  nft_set_pipapo: Fix bucket load in AVX2 lookup routine for six 8-bit groups
  vrf: don't run conntrack on vrf with !dflt qdisc
  netfilter: nfnetlink_queue: silence bogus compiler warning
====================

Link: https://lore.kernel.org/r/20211209000847.102598-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2021-12-08 17:02:33 -08:00
commit fd31cb0c6a
11 changed files with 82 additions and 32 deletions

View File

@ -770,8 +770,6 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev;
vrf_nf_set_untracked(skb);
err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
@ -792,6 +790,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
return skb;
vrf_nf_set_untracked(skb);
if (qdisc_tx_is_default(vrf_dev) ||
IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
return vrf_ip6_out_direct(vrf_dev, sk, skb);
@ -1000,8 +1000,6 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev;
vrf_nf_set_untracked(skb);
err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
@ -1023,6 +1021,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
ipv4_is_lbcast(ip_hdr(skb)->daddr))
return skb;
vrf_nf_set_untracked(skb);
if (qdisc_tx_is_default(vrf_dev) ||
IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
return vrf_ip_out_direct(vrf_dev, sk, skb);

View File

@ -276,14 +276,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
/* jiffies until ct expires, 0 if already expired */
static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
{
s32 timeout = ct->timeout - nfct_time_stamp;
s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
return timeout > 0 ? timeout : 0;
}
static inline bool nf_ct_is_expired(const struct nf_conn *ct)
{
return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0;
}
/* use after obtaining a reference count */
@ -302,7 +302,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
static inline void nf_ct_offload_timeout(struct nf_conn *ct)
{
if (nf_ct_expires(ct) < NF_CT_DAY / 2)
ct->timeout = nfct_time_stamp + NF_CT_DAY;
WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
}
struct kernel_param;

View File

@ -684,7 +684,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
tstamp = nf_conn_tstamp_find(ct);
if (tstamp) {
s32 timeout = ct->timeout - nfct_time_stamp;
s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
tstamp->stop = ktime_get_real_ns();
if (timeout < 0)
@ -1036,7 +1036,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
}
/* We want the clashing entry to go away real soon: 1 second timeout. */
loser_ct->timeout = nfct_time_stamp + HZ;
WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
/* IPS_NAT_CLASH removes the entry automatically on the first
* reply. Also prevents UDP tracker from moving the entry to
@ -1560,7 +1560,7 @@ __nf_conntrack_alloc(struct net *net,
/* save hash for reusing when confirming */
*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
ct->status = 0;
ct->timeout = 0;
WRITE_ONCE(ct->timeout, 0);
write_pnet(&ct->ct_net, net);
memset(&ct->__nfct_init_offset, 0,
offsetof(struct nf_conn, proto) -

View File

@ -1998,7 +1998,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
if (timeout > INT_MAX)
timeout = INT_MAX;
ct->timeout = nfct_time_stamp + (u32)timeout;
WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
if (test_bit(IPS_DYING_BIT, &ct->status))
return -ETIME;

View File

@ -201,8 +201,8 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
if (timeout < 0)
timeout = 0;
if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
ct->timeout = nfct_time_stamp + timeout;
if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
}
static void flow_offload_fixup_ct_state(struct nf_conn *ct)

View File

@ -387,7 +387,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct net_device *indev;
struct net_device *outdev;
struct nf_conn *ct = NULL;
enum ip_conntrack_info ctinfo;
enum ip_conntrack_info ctinfo = 0;
struct nfnl_ct_hook *nfnl_ct;
bool csum_verify;
char *secdata = NULL;

View File

@ -236,7 +236,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
if (!tcph)
return;
goto err;
opt = (u8 *)tcph;
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
@ -251,16 +251,16 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
continue;
if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
return;
goto err;
if (skb_ensure_writable(pkt->skb,
nft_thoff(pkt) + i + priv->len))
return;
goto err;
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
&tcphdr_len);
if (!tcph)
return;
goto err;
offset = i + priv->offset;
@ -303,6 +303,9 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
return;
}
return;
err:
regs->verdict.code = NFT_BREAK;
}
static void nft_exthdr_sctp_eval(const struct nft_expr *expr,

View File

@ -886,7 +886,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize);
NFT_PIPAPO_AVX2_AND(5, 0, 1);
NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize);
NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 5, pkt[5], bsize);
NFT_PIPAPO_AVX2_AND(7, 2, 3);
/* Stall */

View File

@ -150,11 +150,27 @@ EOF
# oifname is the vrf device.
test_masquerade_vrf()
{
local qdisc=$1
if [ "$qdisc" != "default" ]; then
tc -net $ns0 qdisc add dev tvrf root $qdisc
fi
ip netns exec $ns0 conntrack -F 2>/dev/null
ip netns exec $ns0 nft -f - <<EOF
flush ruleset
table ip nat {
chain rawout {
type filter hook output priority raw;
oif tvrf ct state untracked counter
}
chain postrouting2 {
type filter hook postrouting priority mangle;
oif tvrf ct state untracked counter
}
chain postrouting {
type nat hook postrouting priority 0;
# NB: masquerade should always be combined with 'oif(name) bla',
@ -171,13 +187,18 @@ EOF
fi
# must also check that nat table was evaluated on second (lower device) iteration.
ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
if [ $? -eq 0 ]; then
echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device"
echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
else
echo "FAIL: vrf masq rule has unexpected counter value"
echo "FAIL: vrf rules have unexpected counter value"
ret=1
fi
if [ "$qdisc" != "default" ]; then
tc -net $ns0 qdisc del dev tvrf root
fi
}
# add masq rule that gets evaluated w. outif set to veth device.
@ -213,7 +234,8 @@ EOF
}
test_ct_zone_in
test_masquerade_vrf
test_masquerade_vrf "default"
test_masquerade_vrf "pfifo"
test_masquerade_veth
exit $ret

View File

@ -23,8 +23,8 @@ TESTS="reported_issues correctness concurrency timeout"
# Set types, defined by TYPE_ variables below
TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port
net_port_mac_proto_net"
net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
BUGS="flush_remove_add"
@ -277,6 +277,23 @@ perf_entries 1000
perf_proto ipv4
"
TYPE_mac_net="
display mac,net
type_spec ether_addr . ipv4_addr
chain_spec ether saddr . ip saddr
dst
src mac addr4
start 1
count 5
src_delta 2000
tools sendip nc bash
proto udp
race_repeat 0
perf_duration 0
"
TYPE_net_mac_icmp="
display net,mac - ICMP
type_spec ipv4_addr . ether_addr
@ -984,7 +1001,8 @@ format() {
fi
done
for f in ${src}; do
__expr="${__expr} . "
[ "${__expr}" != "{ " ] && __expr="${__expr} . "
__start="$(eval format_"${f}" "${srcstart}")"
__end="$(eval format_"${f}" "${srcend}")"

View File

@ -18,11 +18,17 @@ cleanup()
ip netns del $ns
}
ip netns add $ns
if [ $? -ne 0 ];then
echo "SKIP: Could not create net namespace $gw"
exit $ksft_skip
fi
checktool (){
if ! $1 > /dev/null 2>&1; then
echo "SKIP: Could not $2"
exit $ksft_skip
fi
}
checktool "nft --version" "run test without nft tool"
checktool "ip -Version" "run test without ip tool"
checktool "socat -V" "run test without socat tool"
checktool "ip netns add $ns" "create net namespace"
trap cleanup EXIT
@ -71,7 +77,8 @@ EOF
local start=$(date +%s%3N)
i=$((i + 10000))
j=$((j + 1))
dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null
# nft rule in output places each packet in a different zone.
dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
if [ $? -ne 0 ] ;then
ret=1
break