diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index a382d2c04a42..cf943016930f 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -4,15 +4,13 @@ #include #include -/* - * Based on the MROUTING 3.5 defines primarily to keep - * source compatibility with BSD. +/* Based on the MROUTING 3.5 defines primarily to keep + * source compatibility with BSD. * - * See the mrouted code for the original history. - * - * Protocol Independent Multicast (PIM) data structures included - * Carlos Picoto (cap@di.fc.ul.pt) + * See the mrouted code for the original history. * + * Protocol Independent Multicast (PIM) data structures included + * Carlos Picoto (cap@di.fc.ul.pt) */ #define MRT_BASE 200 @@ -34,15 +32,13 @@ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) #define SIOCGETRPF (SIOCPROTOPRIVATE+2) -#define MAXVIFS 32 +#define MAXVIFS 32 typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */ typedef unsigned short vifi_t; #define ALL_VIFS ((vifi_t)(-1)) -/* - * Same idea as select - */ - +/* Same idea as select */ + #define VIFM_SET(n,m) ((m)|=(1<<(n))) #define VIFM_CLR(n,m) ((m)&=~(1<<(n))) #define VIFM_ISSET(n,m) ((m)&(1<<(n))) @@ -50,11 +46,9 @@ typedef unsigned short vifi_t; #define VIFM_COPY(mfrom,mto) ((mto)=(mfrom)) #define VIFM_SAME(m1,m2) ((m1)==(m2)) -/* - * Passed by mrouted for an MRT_ADD_VIF - again we use the - * mrouted 3.6 structures for compatibility +/* Passed by mrouted for an MRT_ADD_VIF - again we use the + * mrouted 3.6 structures for compatibility */ - struct vifctl { vifi_t vifc_vifi; /* Index of VIF */ unsigned char vifc_flags; /* VIFF_ flags */ @@ -73,10 +67,7 @@ struct vifctl { #define VIFF_USE_IFINDEX 0x8 /* use vifc_lcl_ifindex instead of vifc_lcl_addr to find an interface */ -/* - * Cache manipulation structures for mrouted and PIMd - */ - +/* Cache manipulation structures for mrouted and PIMd */ struct mfcctl { struct in_addr mfcc_origin; /* Origin of mcast */ struct in_addr mfcc_mcastgrp; /* Group in question */ @@ -88,10 +79,7 @@ struct mfcctl { int mfcc_expire; }; -/* - * Group count retrieval for mrouted - */ - +/* Group count retrieval for mrouted */ struct sioc_sg_req { struct in_addr src; struct in_addr grp; @@ -100,10 +88,7 @@ struct sioc_sg_req { unsigned long wrong_if; }; -/* - * To get vif packet counts - */ - +/* To get vif packet counts */ struct sioc_vif_req { vifi_t vifi; /* Which iface */ unsigned long icount; /* In packets */ @@ -112,11 +97,9 @@ struct sioc_vif_req { unsigned long obytes; /* Out bytes */ }; -/* - * This is the format the mroute daemon expects to see IGMP control - * data. Magically happens to be like an IP packet as per the original +/* This is the format the mroute daemon expects to see IGMP control + * data. Magically happens to be like an IP packet as per the original */ - struct igmpmsg { __u32 unused1,unused2; unsigned char im_msgtype; /* What is this */ @@ -126,21 +109,13 @@ struct igmpmsg { struct in_addr im_src,im_dst; }; -/* - * That's all usermode folks - */ - - +/* That's all usermode folks */ #define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */ -/* - * Pseudo messages used by mrouted - */ - +/* Pseudo messages used by mrouted */ #define IGMPMSG_NOCACHE 1 /* Kern cache fill request to mrouted */ #define IGMPMSG_WRONGVIF 2 /* For PIM assert processing (unused) */ #define IGMPMSG_WHOLEPKT 3 /* For PIM Register processing */ - #endif /* _UAPI__LINUX_MROUTE_H */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 92dd4b74d513..a2d248d9c35c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,10 +67,6 @@ #include #include -#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) -#define CONFIG_IP_PIMSM 1 -#endif - struct mr_table { struct list_head list; possible_net_t net; @@ -84,9 +80,7 @@ struct mr_table { atomic_t cache_resolve_queue_len; bool mroute_do_assert; bool mroute_do_pim; -#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) int mroute_reg_vif_num; -#endif }; struct ipmr_rule { @@ -97,15 +91,18 @@ struct ipmr_result { struct mr_table *mrt; }; +static inline bool pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + /* Big lock, protecting vif table, mrt cache and mroute socket state. * Note that the changes are semaphored via rtnl_lock. */ static DEFINE_RWLOCK(mrt_lock); -/* - * Multicast router control variables - */ +/* Multicast router control variables */ #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) @@ -252,8 +249,8 @@ static int __net_init ipmr_rules_init(struct net *net) INIT_LIST_HEAD(&net->ipv4.mr_tables); mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - if (!mrt) { - err = -ENOMEM; + if (IS_ERR(mrt)) { + err = PTR_ERR(mrt); goto err1; } @@ -301,8 +298,13 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, static int __net_init ipmr_rules_init(struct net *net) { - net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - return net->ipv4.mrt ? 0 : -ENOMEM; + struct mr_table *mrt; + + mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); + if (IS_ERR(mrt)) + return PTR_ERR(mrt); + net->ipv4.mrt = mrt; + return 0; } static void __net_exit ipmr_rules_exit(struct net *net) @@ -319,13 +321,17 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) struct mr_table *mrt; unsigned int i; + /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ + if (id != RT_TABLE_DEFAULT && id >= 1000000000) + return ERR_PTR(-EINVAL); + mrt = ipmr_get_table(net, id); if (mrt) return mrt; mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); if (!mrt) - return NULL; + return ERR_PTR(-ENOMEM); write_pnet(&mrt->net, net); mrt->id = id; @@ -338,9 +344,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, (unsigned long)mrt); -#ifdef CONFIG_IP_PIMSM mrt->mroute_reg_vif_num = -1; -#endif #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); #endif @@ -387,8 +391,24 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) } } -static -struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) +/* Initialize ipmr pimreg/tunnel in_device */ +static bool ipmr_init_vif_indev(const struct net_device *dev) +{ + struct in_device *in_dev; + + ASSERT_RTNL(); + + in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return false; + ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); + IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; + + return true; +} + +static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { struct net_device *dev; @@ -399,7 +419,6 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) int err; struct ifreq ifr; struct ip_tunnel_parm p; - struct in_device *in_dev; memset(&p, 0, sizeof(p)); p.iph.daddr = v->vifc_rmt_addr.s_addr; @@ -424,15 +443,8 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) if (err == 0 && (dev = __dev_get_by_name(net, p.name)) != NULL) { dev->flags |= IFF_MULTICAST; - - in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) + if (!ipmr_init_vif_indev(dev)) goto failure; - - ipv4_devconf_setall(in_dev); - neigh_parms_data_state_setall(in_dev->arp_parms); - IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; - if (dev_open(dev)) goto failure; dev_hold(dev); @@ -449,8 +461,7 @@ failure: return NULL; } -#ifdef CONFIG_IP_PIMSM - +#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); @@ -500,7 +511,6 @@ static void reg_vif_setup(struct net_device *dev) static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) { struct net_device *dev; - struct in_device *in_dev; char name[IFNAMSIZ]; if (mrt->id == RT_TABLE_DEFAULT) @@ -520,18 +530,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) return NULL; } - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (!in_dev) { - rcu_read_unlock(); + if (!ipmr_init_vif_indev(dev)) goto failure; - } - - ipv4_devconf_setall(in_dev); - neigh_parms_data_state_setall(in_dev->arp_parms); - IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; - rcu_read_unlock(); - if (dev_open(dev)) goto failure; @@ -547,13 +547,56 @@ failure: unregister_netdevice(dev); return NULL; } + +/* called with rcu_read_lock() */ +static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, + unsigned int pimlen) +{ + struct net_device *reg_dev = NULL; + struct iphdr *encap; + + encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); + /* Check that: + * a. packet is really sent to a multicast group + * b. packet is not a NULL-REGISTER + * c. packet is not truncated + */ + if (!ipv4_is_multicast(encap->daddr) || + encap->tot_len == 0 || + ntohs(encap->tot_len) + pimlen > skb->len) + return 1; + + read_lock(&mrt_lock); + if (mrt->mroute_reg_vif_num >= 0) + reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; + read_unlock(&mrt_lock); + + if (!reg_dev) + return 1; + + skb->mac_header = skb->network_header; + skb_pull(skb, (u8 *)encap - skb->data); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IP); + skb->ip_summed = CHECKSUM_NONE; + + skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); + + netif_rx(skb); + + return NET_RX_SUCCESS; +} +#else +static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) +{ + return NULL; +} #endif /** * vif_delete - Delete a VIF entry * @notify: Set to 1, if the caller is a notifier_call */ - static int vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { @@ -575,10 +618,8 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, return -EADDRNOTAVAIL; } -#ifdef CONFIG_IP_PIMSM if (vifi == mrt->mroute_reg_vif_num) mrt->mroute_reg_vif_num = -1; -#endif if (vifi + 1 == mrt->maxvif) { int tmp; @@ -625,7 +666,6 @@ static inline void ipmr_cache_free(struct mfc_cache *c) /* Destroy an unresolved cache entry, killing queued skbs * and reporting error to netlink readers. */ - static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) { struct net *net = read_pnet(&mrt->net); @@ -653,9 +693,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) ipmr_cache_free(c); } - /* Timer process for the unresolved queue. */ - static void ipmr_expire_process(unsigned long arg) { struct mr_table *mrt = (struct mr_table *)arg; @@ -695,7 +733,6 @@ out: } /* Fill oifs list. It is called under write locked mrt_lock. */ - static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, unsigned char *ttls) { @@ -731,10 +768,10 @@ static int vif_add(struct net *net, struct mr_table *mrt, return -EADDRINUSE; switch (vifc->vifc_flags) { -#ifdef CONFIG_IP_PIMSM case VIFF_REGISTER: - /* - * Special Purpose VIF in PIM + if (!pimsm_enabled()) + return -EINVAL; + /* Special Purpose VIF in PIM * All the packets will be sent to the daemon */ if (mrt->mroute_reg_vif_num >= 0) @@ -749,7 +786,6 @@ static int vif_add(struct net *net, struct mr_table *mrt, return err; } break; -#endif case VIFF_TUNNEL: dev = ipmr_new_tunnel(net, vifc); if (!dev) @@ -761,7 +797,6 @@ static int vif_add(struct net *net, struct mr_table *mrt, return err; } break; - case VIFF_USE_IFINDEX: case 0: if (vifc->vifc_flags == VIFF_USE_IFINDEX) { @@ -815,10 +850,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* And finish update writing critical data */ write_lock_bh(&mrt_lock); v->dev = dev; -#ifdef CONFIG_IP_PIMSM if (v->flags & VIFF_REGISTER) mrt->mroute_reg_vif_num = vifi; -#endif if (vifi+1 > mrt->maxvif) mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); @@ -883,9 +916,7 @@ skip: return ipmr_cache_find_any_parent(mrt, vifi); } -/* - * Allocate a multicast cache entry - */ +/* Allocate a multicast cache entry */ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); @@ -906,10 +937,7 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void) return c; } -/* - * A cache entry has gone into a resolved state from queued - */ - +/* A cache entry has gone into a resolved state from queued */ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct mfc_cache *uc, struct mfc_cache *c) { @@ -917,7 +945,6 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct nlmsgerr *e; /* Play the pending entries through our router */ - while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); @@ -941,34 +968,29 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, } } -/* - * Bounce a cache query up to mrouted. We could use netlink for this but mrouted - * expects the following bizarre scheme. +/* Bounce a cache query up to mrouted. We could use netlink for this but mrouted + * expects the following bizarre scheme. * - * Called under mrt_lock. + * Called under mrt_lock. */ - static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert) { - struct sk_buff *skb; const int ihl = ip_hdrlen(pkt); + struct sock *mroute_sk; struct igmphdr *igmp; struct igmpmsg *msg; - struct sock *mroute_sk; + struct sk_buff *skb; int ret; -#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); else -#endif skb = alloc_skb(128, GFP_ATOMIC); if (!skb) return -ENOBUFS; -#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) { /* Ugly, but we have no choice with this interface. * Duplicate old header, fix ihl, length etc. @@ -986,28 +1008,23 @@ static int ipmr_cache_report(struct mr_table *mrt, ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + sizeof(struct iphdr)); - } else -#endif - { - - /* Copy the IP header */ - - skb_set_network_header(skb, skb->len); - skb_put(skb, ihl); - skb_copy_to_linear_data(skb, pkt->data, ihl); - ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ - msg = (struct igmpmsg *)skb_network_header(skb); - msg->im_vif = vifi; - skb_dst_set(skb, dst_clone(skb_dst(pkt))); - - /* Add our header */ - - igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); - igmp->type = - msg->im_msgtype = assert; - igmp->code = 0; - ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ - skb->transport_header = skb->network_header; + } else { + /* Copy the IP header */ + skb_set_network_header(skb, skb->len); + skb_put(skb, ihl); + skb_copy_to_linear_data(skb, pkt->data, ihl); + /* Flag to the kernel this is a route add */ + ip_hdr(skb)->protocol = 0; + msg = (struct igmpmsg *)skb_network_header(skb); + msg->im_vif = vifi; + skb_dst_set(skb, dst_clone(skb_dst(pkt))); + /* Add our header */ + igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); + igmp->type = assert; + msg->im_msgtype = assert; + igmp->code = 0; + ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ + skb->transport_header = skb->network_header; } rcu_read_lock(); @@ -1019,7 +1036,6 @@ static int ipmr_cache_report(struct mr_table *mrt, } /* Deliver to mrouted */ - ret = sock_queue_rcv_skb(mroute_sk, skb); rcu_read_unlock(); if (ret < 0) { @@ -1030,12 +1046,9 @@ static int ipmr_cache_report(struct mr_table *mrt, return ret; } -/* - * Queue a packet for resolution. It gets locked cache entry! - */ - -static int -ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) +/* Queue a packet for resolution. It gets locked cache entry! */ +static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, + struct sk_buff *skb) { bool found = false; int err; @@ -1053,7 +1066,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) if (!found) { /* Create a new entry if allowable */ - if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || (c = ipmr_cache_alloc_unres()) == NULL) { spin_unlock_bh(&mfc_unres_lock); @@ -1063,13 +1075,11 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) } /* Fill in the new cache entry */ - c->mfc_parent = -1; c->mfc_origin = iph->saddr; c->mfc_mcastgrp = iph->daddr; /* Reflect first query at mrouted. */ - err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); if (err < 0) { /* If the report failed throw the cache entry @@ -1091,7 +1101,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) } /* See if we can append the packet */ - if (c->mfc_un.unres.unresolved.qlen > 3) { kfree_skb(skb); err = -ENOBUFS; @@ -1104,9 +1113,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) return err; } -/* - * MFC cache manipulation by user space mroute daemon - */ +/* MFC cache manipulation by user space mroute daemon */ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { @@ -1177,9 +1184,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, list_add_rcu(&c->list, &mrt->mfc_cache_array[line]); - /* - * Check to see if we resolved a queued list. If so we - * need to send on the frames and tidy up. + /* Check to see if we resolved a queued list. If so we + * need to send on the frames and tidy up. */ found = false; spin_lock_bh(&mfc_unres_lock); @@ -1204,10 +1210,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return 0; } -/* - * Close the multicast socket, and clear the vif tables etc - */ - +/* Close the multicast socket, and clear the vif tables etc */ static void mroute_clean_tables(struct mr_table *mrt) { int i; @@ -1215,7 +1218,6 @@ static void mroute_clean_tables(struct mr_table *mrt) struct mfc_cache *c, *next; /* Shut down all active vif entries */ - for (i = 0; i < mrt->maxvif; i++) { if (!(mrt->vif_table[i].flags & VIFF_STATIC)) vif_delete(mrt, i, 0, &list); @@ -1223,7 +1225,6 @@ static void mroute_clean_tables(struct mr_table *mrt) unregister_netdevice_many(&list); /* Wipe the cache */ - for (i = 0; i < MFC_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { if (c->mfc_flags & MFC_STATIC) @@ -1267,45 +1268,51 @@ static void mrtsock_destruct(struct sock *sk) rtnl_unlock(); } -/* - * Socket options and virtual interface manipulation. The whole - * virtual interface system is a complete heap, but unfortunately - * that's how BSD mrouted happens to think. Maybe one day with a proper - * MOSPF/PIM router set up we can clean this up. +/* Socket options and virtual interface manipulation. The whole + * virtual interface system is a complete heap, but unfortunately + * that's how BSD mrouted happens to think. Maybe one day with a proper + * MOSPF/PIM router set up we can clean this up. */ -int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) +int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, + unsigned int optlen) { - int ret, parent = 0; + struct net *net = sock_net(sk); + int val, ret = 0, parent = 0; + struct mr_table *mrt; struct vifctl vif; struct mfcctl mfc; - struct net *net = sock_net(sk); - struct mr_table *mrt; + u32 uval; + /* There's one exception to the lock - MRT_DONE which needs to unlock */ + rtnl_lock(); if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->inet_num != IPPROTO_IGMP) - return -EOPNOTSUPP; + inet_sk(sk)->inet_num != IPPROTO_IGMP) { + ret = -EOPNOTSUPP; + goto out_unlock; + } mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (!mrt) - return -ENOENT; - + if (!mrt) { + ret = -ENOENT; + goto out_unlock; + } if (optname != MRT_INIT) { if (sk != rcu_access_pointer(mrt->mroute_sk) && - !ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EACCES; + !ns_capable(net->user_ns, CAP_NET_ADMIN)) { + ret = -EACCES; + goto out_unlock; + } } switch (optname) { case MRT_INIT: if (optlen != sizeof(int)) - return -EINVAL; - - rtnl_lock(); - if (rtnl_dereference(mrt->mroute_sk)) { - rtnl_unlock(); - return -EADDRINUSE; - } + ret = -EINVAL; + if (rtnl_dereference(mrt->mroute_sk)) + ret = -EADDRINUSE; + if (ret) + break; ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { @@ -1315,129 +1322,133 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); } - rtnl_unlock(); - return ret; + break; case MRT_DONE: - if (sk != rcu_access_pointer(mrt->mroute_sk)) - return -EACCES; - return ip_ra_control(sk, 0, NULL); + if (sk != rcu_access_pointer(mrt->mroute_sk)) { + ret = -EACCES; + } else { + /* We need to unlock here because mrtsock_destruct takes + * care of rtnl itself and we can't change that due to + * the IP_ROUTER_ALERT setsockopt which runs without it. + */ + rtnl_unlock(); + ret = ip_ra_control(sk, 0, NULL); + goto out; + } + break; case MRT_ADD_VIF: case MRT_DEL_VIF: - if (optlen != sizeof(vif)) - return -EINVAL; - if (copy_from_user(&vif, optval, sizeof(vif))) - return -EFAULT; - if (vif.vifc_vifi >= MAXVIFS) - return -ENFILE; - rtnl_lock(); + if (optlen != sizeof(vif)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&vif, optval, sizeof(vif))) { + ret = -EFAULT; + break; + } + if (vif.vifc_vifi >= MAXVIFS) { + ret = -ENFILE; + break; + } if (optname == MRT_ADD_VIF) { ret = vif_add(net, mrt, &vif, sk == rtnl_dereference(mrt->mroute_sk)); } else { ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); } - rtnl_unlock(); - return ret; - - /* - * Manipulate the forwarding caches. These live - * in a sort of kernel/user symbiosis. - */ + break; + /* Manipulate the forwarding caches. These live + * in a sort of kernel/user symbiosis. + */ case MRT_ADD_MFC: case MRT_DEL_MFC: parent = -1; case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: - if (optlen != sizeof(mfc)) - return -EINVAL; - if (copy_from_user(&mfc, optval, sizeof(mfc))) - return -EFAULT; + if (optlen != sizeof(mfc)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&mfc, optval, sizeof(mfc))) { + ret = -EFAULT; + break; + } if (parent == 0) parent = mfc.mfcc_parent; - rtnl_lock(); if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, sk == rtnl_dereference(mrt->mroute_sk), parent); - rtnl_unlock(); - return ret; - /* - * Control PIM assert. - */ + break; + /* Control PIM assert. */ case MRT_ASSERT: - { - int v; - if (optlen != sizeof(v)) - return -EINVAL; - if (get_user(v, (int __user *)optval)) - return -EFAULT; - mrt->mroute_do_assert = v; - return 0; - } -#ifdef CONFIG_IP_PIMSM - case MRT_PIM: - { - int v; - - if (optlen != sizeof(v)) - return -EINVAL; - if (get_user(v, (int __user *)optval)) - return -EFAULT; - v = !!v; - - rtnl_lock(); - ret = 0; - if (v != mrt->mroute_do_pim) { - mrt->mroute_do_pim = v; - mrt->mroute_do_assert = v; + if (optlen != sizeof(val)) { + ret = -EINVAL; + break; } - rtnl_unlock(); - return ret; - } -#endif -#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES + if (get_user(val, (int __user *)optval)) { + ret = -EFAULT; + break; + } + mrt->mroute_do_assert = val; + break; + case MRT_PIM: + if (!pimsm_enabled()) { + ret = -ENOPROTOOPT; + break; + } + if (optlen != sizeof(val)) { + ret = -EINVAL; + break; + } + if (get_user(val, (int __user *)optval)) { + ret = -EFAULT; + break; + } + + val = !!val; + if (val != mrt->mroute_do_pim) { + mrt->mroute_do_pim = val; + mrt->mroute_do_assert = val; + } + break; case MRT_TABLE: - { - u32 v; + if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { + ret = -ENOPROTOOPT; + break; + } + if (optlen != sizeof(uval)) { + ret = -EINVAL; + break; + } + if (get_user(uval, (u32 __user *)optval)) { + ret = -EFAULT; + break; + } - if (optlen != sizeof(u32)) - return -EINVAL; - if (get_user(v, (u32 __user *)optval)) - return -EFAULT; - - /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ - if (v != RT_TABLE_DEFAULT && v >= 1000000000) - return -EINVAL; - - rtnl_lock(); - ret = 0; if (sk == rtnl_dereference(mrt->mroute_sk)) { ret = -EBUSY; } else { - if (!ipmr_new_table(net, v)) - ret = -ENOMEM; + mrt = ipmr_new_table(net, uval); + if (IS_ERR(mrt)) + ret = PTR_ERR(mrt); else - raw_sk(sk)->ipmr_table = v; + raw_sk(sk)->ipmr_table = uval; } - rtnl_unlock(); - return ret; - } -#endif - /* - * Spurious command, or MRT_VERSION which you cannot - * set. - */ + break; + /* Spurious command, or MRT_VERSION which you cannot set. */ default: - return -ENOPROTOOPT; + ret = -ENOPROTOOPT; } +out_unlock: + rtnl_unlock(); +out: + return ret; } -/* - * Getsock opt support for the multicast routing system. - */ - +/* Getsock opt support for the multicast routing system. */ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int olr; @@ -1453,39 +1464,35 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int if (!mrt) return -ENOENT; - if (optname != MRT_VERSION && -#ifdef CONFIG_IP_PIMSM - optname != MRT_PIM && -#endif - optname != MRT_ASSERT) + switch (optname) { + case MRT_VERSION: + val = 0x0305; + break; + case MRT_PIM: + if (!pimsm_enabled()) + return -ENOPROTOOPT; + val = mrt->mroute_do_pim; + break; + case MRT_ASSERT: + val = mrt->mroute_do_assert; + break; + default: return -ENOPROTOOPT; + } if (get_user(olr, optlen)) return -EFAULT; - olr = min_t(unsigned int, olr, sizeof(int)); if (olr < 0) return -EINVAL; - if (put_user(olr, optlen)) return -EFAULT; - if (optname == MRT_VERSION) - val = 0x0305; -#ifdef CONFIG_IP_PIMSM - else if (optname == MRT_PIM) - val = mrt->mroute_do_pim; -#endif - else - val = mrt->mroute_do_assert; if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; } -/* - * The IP multicast ioctl support routines. - */ - +/* The IP multicast ioctl support routines. */ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) { struct sioc_sg_req sr; @@ -1618,7 +1625,6 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) } #endif - static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -1640,17 +1646,14 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v return NOTIFY_DONE; } - static struct notifier_block ip_mr_notifier = { .notifier_call = ipmr_device_event, }; -/* - * Encapsulate a packet by attaching a valid IPIP header to it. - * This avoids tunnel drivers and other mess and gives us the speed so - * important for multicast video. +/* Encapsulate a packet by attaching a valid IPIP header to it. + * This avoids tunnel drivers and other mess and gives us the speed so + * important for multicast video. */ - static void ip_encap(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr) { @@ -1692,9 +1695,7 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } -/* - * Processing handlers for ipmr_forward - */ +/* Processing handlers for ipmr_forward */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, int vifi) @@ -1709,7 +1710,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, if (!vif->dev) goto out_free; -#ifdef CONFIG_IP_PIMSM if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; @@ -1718,7 +1718,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); goto out_free; } -#endif if (vif->flags & VIFF_TUNNEL) { rt = ip_route_output_ports(net, &fl4, NULL, @@ -1745,7 +1744,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * allow to send ICMP, so that packets will disappear * to blackhole. */ - IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; @@ -1777,8 +1775,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, IPCB(skb)->flags |= IPSKB_FORWARDED; - /* - * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally + /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output * interfaces. It is clear, if mrouter runs a multicasting * program, it should receive packets not depending to what interface @@ -1809,7 +1806,6 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) } /* "local" means that we should preserve one skb (for local delivery) */ - static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *cache, int local) @@ -1834,9 +1830,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, goto forward; } - /* - * Wrong interface: drop packet and (maybe) send PIM assert. - */ + /* Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. @@ -1875,9 +1869,7 @@ forward: mrt->vif_table[vif].pkt_in++; mrt->vif_table[vif].bytes_in += skb->len; - /* - * Forward the frame - */ + /* Forward the frame */ if (cache->mfc_origin == htonl(INADDR_ANY) && cache->mfc_mcastgrp == htonl(INADDR_ANY)) { if (true_vifi >= 0 && @@ -1951,11 +1943,9 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) return mrt; } -/* - * Multicast packets for forwarding arrive here - * Called with rcu_read_lock(); +/* Multicast packets for forwarding arrive here + * Called with rcu_read_lock(); */ - int ip_mr_input(struct sk_buff *skb) { struct mfc_cache *cache; @@ -2006,9 +1996,7 @@ int ip_mr_input(struct sk_buff *skb) vif); } - /* - * No usable cache entry - */ + /* No usable cache entry */ if (!cache) { int vif; @@ -2049,53 +2037,8 @@ dont_forward: return 0; } -#ifdef CONFIG_IP_PIMSM -/* called with rcu_read_lock() */ -static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, - unsigned int pimlen) -{ - struct net_device *reg_dev = NULL; - struct iphdr *encap; - - encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); - /* - * Check that: - * a. packet is really sent to a multicast group - * b. packet is not a NULL-REGISTER - * c. packet is not truncated - */ - if (!ipv4_is_multicast(encap->daddr) || - encap->tot_len == 0 || - ntohs(encap->tot_len) + pimlen > skb->len) - return 1; - - read_lock(&mrt_lock); - if (mrt->mroute_reg_vif_num >= 0) - reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; - read_unlock(&mrt_lock); - - if (!reg_dev) - return 1; - - skb->mac_header = skb->network_header; - skb_pull(skb, (u8 *)encap - skb->data); - skb_reset_network_header(skb); - skb->protocol = htons(ETH_P_IP); - skb->ip_summed = CHECKSUM_NONE; - - skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); - - netif_rx(skb); - - return NET_RX_SUCCESS; -} -#endif - #ifdef CONFIG_IP_PIMSM_V1 -/* - * Handle IGMP messages of PIMv1 - */ - +/* Handle IGMP messages of PIMv1 */ int pim_rcv_v1(struct sk_buff *skb) { struct igmphdr *pim; @@ -2420,9 +2363,8 @@ done: } #ifdef CONFIG_PROC_FS -/* - * The /proc interfaces to multicast routing : - * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif +/* The /proc interfaces to multicast routing : + * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif */ struct ipmr_vif_iter { struct seq_net_private p; @@ -2706,10 +2648,7 @@ static const struct net_protocol pim_protocol = { }; #endif - -/* - * Setup for IP multicast routing - */ +/* Setup for IP multicast routing */ static int __net_init ipmr_net_init(struct net *net) { int err; @@ -2759,8 +2698,6 @@ int __init ip_mr_init(void) sizeof(struct mfc_cache), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - if (!mrt_cachep) - return -ENOMEM; err = register_pernet_subsys(&ipmr_net_ops); if (err)