2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-25 13:43:55 +08:00

Merge branch 'udp-fix-early-demux-for-mcast-packets'

Paolo Abeni says:

====================
udp: fix early demux for mcast packets

Currently the early demux callbacks do not perform source address validation.
This is not an issue for TCP or UDP unicast, where the early demux
is only allowed for connected sockets and the source address is validated
for the first packet and never change.

The UDP protocol currently allows early demux also for unconnected multicast
sockets, and we are not currently doing any validation for them, after that
the first packet lands on the socket: beyond ignoring the rp_filter - if
enabled - any kind of martian sources are also allowed.

This series addresses the issue allowing the early demux callback to return an
error code, and performing the proper checks for unconnected UDP multicast
sockets before leveraging the rx dst cache.

Alternatively we could disable the early demux for unconnected mcast sockets,
but that would cause relevant performance regression - around 50% - while with
this series, with full rp_filter in place, we keep the regression to a more
moderate level.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-10-01 03:55:47 +01:00
commit 230583c195
8 changed files with 71 additions and 45 deletions

View File

@ -39,8 +39,8 @@
/* This is used to register protocols. */ /* This is used to register protocols. */
struct net_protocol { struct net_protocol {
void (*early_demux)(struct sk_buff *skb); int (*early_demux)(struct sk_buff *skb);
void (*early_demux_handler)(struct sk_buff *skb); int (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb, u32 info); void (*err_handler)(struct sk_buff *skb, u32 info);
unsigned int no_policy:1, unsigned int no_policy:1,

View File

@ -175,7 +175,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
fl4->fl4_gre_key = gre_key; fl4->fl4_gre_key = gre_key;
return ip_route_output_key(net, fl4); return ip_route_output_key(net, fl4);
} }
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev,
struct in_device *in_dev, u32 *itag);
int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin); u8 tos, struct net_device *devin);
int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src, int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,

View File

@ -345,7 +345,7 @@ void tcp_v4_err(struct sk_buff *skb, u32);
void tcp_shutdown(struct sock *sk, int how); void tcp_shutdown(struct sock *sk, int how);
void tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_early_demux(struct sk_buff *skb);
int tcp_v4_rcv(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb);
int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);

View File

@ -259,7 +259,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err);
} }
void udp_v4_early_demux(struct sk_buff *skb); int udp_v4_early_demux(struct sk_buff *skb);
bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
int udp_get_port(struct sock *sk, unsigned short snum, int udp_get_port(struct sock *sk, unsigned short snum,
int (*saddr_cmp)(const struct sock *, int (*saddr_cmp)(const struct sock *,

View File

@ -311,9 +311,10 @@ drop:
static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{ {
const struct iphdr *iph = ip_hdr(skb); const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt; int (*edemux)(struct sk_buff *skb);
struct net_device *dev = skb->dev; struct net_device *dev = skb->dev;
void (*edemux)(struct sk_buff *skb); struct rtable *rt;
int err;
/* if ingress device is enslaved to an L3 master device pass the /* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing * skb to its handler for processing
@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
ipprot = rcu_dereference(inet_protos[protocol]); ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
edemux(skb); err = edemux(skb);
if (unlikely(err))
goto drop_error;
/* must reload iph, skb->head might have changed */ /* must reload iph, skb->head might have changed */
iph = ip_hdr(skb); iph = ip_hdr(skb);
} }
@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
* how the packet travels inside Linux networking. * how the packet travels inside Linux networking.
*/ */
if (!skb_valid_dst(skb)) { if (!skb_valid_dst(skb)) {
int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
iph->tos, dev); iph->tos, dev);
if (unlikely(err)) { if (unlikely(err))
if (err == -EXDEV) goto drop_error;
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
goto drop;
}
} }
#ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_ROUTE_CLASSID
@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
drop: drop:
kfree_skb(skb); kfree_skb(skb);
return NET_RX_DROP; return NET_RX_DROP;
drop_error:
if (err == -EXDEV)
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
goto drop;
} }
/* /*

View File

@ -1520,43 +1520,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
EXPORT_SYMBOL(rt_dst_alloc); EXPORT_SYMBOL(rt_dst_alloc);
/* called in rcu_read_lock() section */ /* called in rcu_read_lock() section */
static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev, int our) u8 tos, struct net_device *dev,
struct in_device *in_dev, u32 *itag)
{ {
struct rtable *rth;
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
u32 itag = 0;
int err; int err;
/* Primary sanity checks. */ /* Primary sanity checks. */
if (!in_dev) if (!in_dev)
return -EINVAL; return -EINVAL;
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
skb->protocol != htons(ETH_P_IP)) skb->protocol != htons(ETH_P_IP))
goto e_inval; return -EINVAL;
if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
goto e_inval; return -EINVAL;
if (ipv4_is_zeronet(saddr)) { if (ipv4_is_zeronet(saddr)) {
if (!ipv4_is_local_multicast(daddr)) if (!ipv4_is_local_multicast(daddr))
goto e_inval; return -EINVAL;
} else { } else {
err = fib_validate_source(skb, saddr, 0, tos, 0, dev, err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
in_dev, &itag); in_dev, itag);
if (err < 0) if (err < 0)
goto e_err; return err;
} }
return 0;
}
/* called in rcu_read_lock() section */
static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev, int our)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
struct rtable *rth;
u32 itag = 0;
int err;
err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
if (err)
return err;
if (our) if (our)
flags |= RTCF_LOCAL; flags |= RTCF_LOCAL;
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
if (!rth) if (!rth)
goto e_nobufs; return -ENOBUFS;
#ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag; rth->dst.tclassid = itag;
@ -1572,13 +1585,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
skb_dst_set(skb, &rth->dst); skb_dst_set(skb, &rth->dst);
return 0; return 0;
e_nobufs:
return -ENOBUFS;
e_inval:
return -EINVAL;
e_err:
return err;
} }

View File

@ -1503,23 +1503,23 @@ csum_err:
} }
EXPORT_SYMBOL(tcp_v4_do_rcv); EXPORT_SYMBOL(tcp_v4_do_rcv);
void tcp_v4_early_demux(struct sk_buff *skb) int tcp_v4_early_demux(struct sk_buff *skb)
{ {
const struct iphdr *iph; const struct iphdr *iph;
const struct tcphdr *th; const struct tcphdr *th;
struct sock *sk; struct sock *sk;
if (skb->pkt_type != PACKET_HOST) if (skb->pkt_type != PACKET_HOST)
return; return 0;
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
return; return 0;
iph = ip_hdr(skb); iph = ip_hdr(skb);
th = tcp_hdr(skb); th = tcp_hdr(skb);
if (th->doff < sizeof(struct tcphdr) / 4) if (th->doff < sizeof(struct tcphdr) / 4)
return; return 0;
sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
iph->saddr, th->source, iph->saddr, th->source,
@ -1538,6 +1538,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
skb_dst_set_noref(skb, dst); skb_dst_set_noref(skb, dst);
} }
} }
return 0;
} }
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)

View File

@ -2221,9 +2221,10 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
return NULL; return NULL;
} }
void udp_v4_early_demux(struct sk_buff *skb) int udp_v4_early_demux(struct sk_buff *skb)
{ {
struct net *net = dev_net(skb->dev); struct net *net = dev_net(skb->dev);
struct in_device *in_dev = NULL;
const struct iphdr *iph; const struct iphdr *iph;
const struct udphdr *uh; const struct udphdr *uh;
struct sock *sk = NULL; struct sock *sk = NULL;
@ -2234,24 +2235,24 @@ void udp_v4_early_demux(struct sk_buff *skb)
/* validate the packet */ /* validate the packet */
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
return; return 0;
iph = ip_hdr(skb); iph = ip_hdr(skb);
uh = udp_hdr(skb); uh = udp_hdr(skb);
if (skb->pkt_type == PACKET_BROADCAST || if (skb->pkt_type == PACKET_BROADCAST ||
skb->pkt_type == PACKET_MULTICAST) { skb->pkt_type == PACKET_MULTICAST) {
struct in_device *in_dev = __in_dev_get_rcu(skb->dev); in_dev = __in_dev_get_rcu(skb->dev);
if (!in_dev) if (!in_dev)
return; return 0;
/* we are supposed to accept bcast packets */ /* we are supposed to accept bcast packets */
if (skb->pkt_type == PACKET_MULTICAST) { if (skb->pkt_type == PACKET_MULTICAST) {
ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
iph->protocol); iph->protocol);
if (!ours) if (!ours)
return; return 0;
} }
sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
@ -2263,7 +2264,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
} }
if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
return; return 0;
skb->sk = sk; skb->sk = sk;
skb->destructor = sock_efree; skb->destructor = sock_efree;
@ -2272,12 +2273,23 @@ void udp_v4_early_demux(struct sk_buff *skb)
if (dst) if (dst)
dst = dst_check(dst, 0); dst = dst_check(dst, 0);
if (dst) { if (dst) {
u32 itag = 0;
/* set noref for now. /* set noref for now.
* any place which wants to hold dst has to call * any place which wants to hold dst has to call
* dst_hold_safe() * dst_hold_safe()
*/ */
skb_dst_set_noref(skb, dst); skb_dst_set_noref(skb, dst);
/* for unconnected multicast sockets we need to validate
* the source on each packet
*/
if (!inet_sk(sk)->inet_daddr && in_dev)
return ip_mc_validate_source(skb, iph->daddr,
iph->saddr, iph->tos,
skb->dev, in_dev, &itag);
} }
return 0;
} }
int udp_rcv(struct sk_buff *skb) int udp_rcv(struct sk_buff *skb)