2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-27 22:53:55 +08:00
linux-next/net/ipv6/ndisc.c
Praveen Chaudhary 6b2e04bc24 net: allow user to set metric on default route learned via Router Advertisement
For IPv4, default route is learned via DHCPv4 and user is allowed to change
metric using config etc/network/interfaces. But for IPv6, default route can
be learned via RA, for which, currently a fixed metric value 1024 is used.

Ideally, user should be able to configure metric on default route for IPv6
similar to IPv4. This patch adds sysctl for the same.

Logs:

For IPv4:

Config in etc/network/interfaces:
auto eth0
iface eth0 inet dhcp
    metric 4261413864

IPv4 Kernel Route Table:
$ ip route list
default via 172.21.47.1 dev eth0 metric 4261413864

FRR Table, if a static route is configured:
[In real scenario, it is useful to prefer BGP learned default route over DHCPv4 default route.]
Codes: K - kernel route, C - connected, S - static, R - RIP,
       O - OSPF, I - IS-IS, B - BGP, P - PIM, E - EIGRP, N - NHRP,
       T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
       > - selected route, * - FIB route

S>* 0.0.0.0/0 [20/0] is directly connected, eth0, 00:00:03
K   0.0.0.0/0 [254/1000] via 172.21.47.1, eth0, 6d08h51m

i.e. User can prefer Default Router learned via Routing Protocol in IPv4.
Similar behavior is not possible for IPv6, without this fix.

After fix [for IPv6]:
sudo sysctl -w net.ipv6.conf.eth0.net.ipv6.conf.eth0.ra_defrtr_metric=1996489705

IP monitor: [When IPv6 RA is received]
default via fe80::xx16:xxxx:feb3:ce8e dev eth0 proto ra metric 1996489705  pref high

Kernel IPv6 routing table
$ ip -6 route list
default via fe80::be16:65ff:feb3:ce8e dev eth0 proto ra metric 1996489705 expires 21sec hoplimit 64 pref high

FRR Table, if a static route is configured:
[In real scenario, it is useful to prefer BGP learned default route over IPv6 RA default route.]
Codes: K - kernel route, C - connected, S - static, R - RIPng,
       O - OSPFv3, I - IS-IS, B - BGP, N - NHRP, T - Table,
       v - VNC, V - VNC-Direct, A - Babel, D - SHARP,
       > - selected route, * - FIB route

S>* ::/0 [20/0] is directly connected, eth0, 00:00:06
K   ::/0 [119/1001] via fe80::xx16:xxxx:feb3:ce8e, eth0, 6d07h43m

If the metric is changed later, the effect will be seen only when next IPv6
RA is received, because the default route must be fully controlled by RA msg.
Below metric is changed from 1996489705 to 1996489704.

$ sudo sysctl -w net.ipv6.conf.eth0.ra_defrtr_metric=1996489704
net.ipv6.conf.eth0.ra_defrtr_metric = 1996489704

IP monitor:
[On next IPv6 RA msg, Kernel deletes prev route and installs new route with updated metric]

Deleted default via fe80::xx16:xxxx:feb3:ce8e dev eth0 proto ra metric 1996489705 expires 3sec hoplimit 64 pref high
default via fe80::xx16:xxxx:feb3:ce8e dev eth0 proto ra metric 1996489704 pref high

Signed-off-by: Praveen Chaudhary <pchaudhary@linkedin.com>
Signed-off-by: Zhenggen Xu <zxu@linkedin.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20210125214430.24079-1-pchaudhary@linkedin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-26 18:39:45 -08:00

1969 lines
49 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Neighbour Discovery for IPv6
* Linux INET6 implementation
*
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
* Mike Shaver <shaver@ingenia.com>
*/
/*
* Changes:
*
* Alexey I. Froloff : RFC6106 (DNSSL) support
* Pierre Ynard : export userland ND options
* through netlink (RDNSS support)
* Lars Fenneberg : fixed MTU setting on receipt
* of an RA.
* Janos Farkas : kmalloc failure checks
* Alexey Kuznetsov : state machine reworked
* and moved to net/core.
* Pekka Savola : RFC2461 validation
* YOSHIFUJI Hideaki @USAGI : Verify ND options properly
*/
#define pr_fmt(fmt) "ICMPv6: " fmt
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/sched.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/route.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/if_arp.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/jhash.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/icmp.h>
#include <net/netlink.h>
#include <linux/rtnetlink.h>
#include <net/flow.h>
#include <net/ip6_checksum.h>
#include <net/inet_common.h>
#include <linux/proc_fs.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
__u32 *hash_rnd);
static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
static bool ndisc_allow_add(const struct net_device *dev,
struct netlink_ext_ack *extack);
static int ndisc_constructor(struct neighbour *neigh);
static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
static int pndisc_constructor(struct pneigh_entry *n);
static void pndisc_destructor(struct pneigh_entry *n);
static void pndisc_redo(struct sk_buff *skb);
static int ndisc_is_multicast(const void *pkey);
static const struct neigh_ops ndisc_generic_ops = {
.family = AF_INET6,
.solicit = ndisc_solicit,
.error_report = ndisc_error_report,
.output = neigh_resolve_output,
.connected_output = neigh_connected_output,
};
static const struct neigh_ops ndisc_hh_ops = {
.family = AF_INET6,
.solicit = ndisc_solicit,
.error_report = ndisc_error_report,
.output = neigh_resolve_output,
.connected_output = neigh_resolve_output,
};
static const struct neigh_ops ndisc_direct_ops = {
.family = AF_INET6,
.output = neigh_direct_output,
.connected_output = neigh_direct_output,
};
struct neigh_table nd_tbl = {
.family = AF_INET6,
.key_len = sizeof(struct in6_addr),
.protocol = cpu_to_be16(ETH_P_IPV6),
.hash = ndisc_hash,
.key_eq = ndisc_key_eq,
.constructor = ndisc_constructor,
.pconstructor = pndisc_constructor,
.pdestructor = pndisc_destructor,
.proxy_redo = pndisc_redo,
.is_multicast = ndisc_is_multicast,
.allow_add = ndisc_allow_add,
.id = "ndisc_cache",
.parms = {
.tbl = &nd_tbl,
.reachable_time = ND_REACHABLE_TIME,
.data = {
[NEIGH_VAR_MCAST_PROBES] = 3,
[NEIGH_VAR_UCAST_PROBES] = 3,
[NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER,
[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
[NEIGH_VAR_PROXY_QLEN] = 64,
[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
},
},
.gc_interval = 30 * HZ,
.gc_thresh1 = 128,
.gc_thresh2 = 512,
.gc_thresh3 = 1024,
};
EXPORT_SYMBOL_GPL(nd_tbl);
void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
int data_len, int pad)
{
int space = __ndisc_opt_addr_space(data_len, pad);
u8 *opt = skb_put(skb, space);
opt[0] = type;
opt[1] = space>>3;
memset(opt + 2, 0, pad);
opt += pad;
space -= pad;
memcpy(opt+2, data, data_len);
data_len += 2;
opt += data_len;
space -= data_len;
if (space > 0)
memset(opt, 0, space);
}
EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);
static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
void *data, u8 icmp6_type)
{
__ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
ndisc_addr_option_pad(skb->dev->type));
ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type);
}
static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb,
void *ha,
const u8 *ops_data)
{
ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT);
ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data);
}
static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
struct nd_opt_hdr *end)
{
int type;
if (!cur || !end || cur >= end)
return NULL;
type = cur->nd_opt_type;
do {
cur = ((void *)cur) + (cur->nd_opt_len << 3);
} while (cur < end && cur->nd_opt_type != type);
return cur <= end && cur->nd_opt_type == type ? cur : NULL;
}
static inline int ndisc_is_useropt(const struct net_device *dev,
struct nd_opt_hdr *opt)
{
return opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
opt->nd_opt_type == ND_OPT_PREF64 ||
ndisc_ops_is_useropt(dev, opt->nd_opt_type);
}
static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
struct nd_opt_hdr *cur,
struct nd_opt_hdr *end)
{
if (!cur || !end || cur >= end)
return NULL;
do {
cur = ((void *)cur) + (cur->nd_opt_len << 3);
} while (cur < end && !ndisc_is_useropt(dev, cur));
return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL;
}
struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
u8 *opt, int opt_len,
struct ndisc_options *ndopts)
{
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
if (!nd_opt || opt_len < 0 || !ndopts)
return NULL;
memset(ndopts, 0, sizeof(*ndopts));
while (opt_len) {
int l;
if (opt_len < sizeof(struct nd_opt_hdr))
return NULL;
l = nd_opt->nd_opt_len << 3;
if (opt_len < l || l == 0)
return NULL;
if (ndisc_ops_parse_options(dev, nd_opt, ndopts))
goto next_opt;
switch (nd_opt->nd_opt_type) {
case ND_OPT_SOURCE_LL_ADDR:
case ND_OPT_TARGET_LL_ADDR:
case ND_OPT_MTU:
case ND_OPT_NONCE:
case ND_OPT_REDIRECT_HDR:
if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
ND_PRINTK(2, warn,
"%s: duplicated ND6 option found: type=%d\n",
__func__, nd_opt->nd_opt_type);
} else {
ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
}
break;
case ND_OPT_PREFIX_INFO:
ndopts->nd_opts_pi_end = nd_opt;
if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
break;
#ifdef CONFIG_IPV6_ROUTE_INFO
case ND_OPT_ROUTE_INFO:
ndopts->nd_opts_ri_end = nd_opt;
if (!ndopts->nd_opts_ri)
ndopts->nd_opts_ri = nd_opt;
break;
#endif
default:
if (ndisc_is_useropt(dev, nd_opt)) {
ndopts->nd_useropts_end = nd_opt;
if (!ndopts->nd_useropts)
ndopts->nd_useropts = nd_opt;
} else {
/*
* Unknown options must be silently ignored,
* to accommodate future extension to the
* protocol.
*/
ND_PRINTK(2, notice,
"%s: ignored unsupported option; type=%d, len=%d\n",
__func__,
nd_opt->nd_opt_type,
nd_opt->nd_opt_len);
}
}
next_opt:
opt_len -= l;
nd_opt = ((void *)nd_opt) + l;
}
return ndopts;
}
int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
{
switch (dev->type) {
case ARPHRD_ETHER:
case ARPHRD_IEEE802: /* Not sure. Check it later. --ANK */
case ARPHRD_FDDI:
ipv6_eth_mc_map(addr, buf);
return 0;
case ARPHRD_ARCNET:
ipv6_arcnet_mc_map(addr, buf);
return 0;
case ARPHRD_INFINIBAND:
ipv6_ib_mc_map(addr, dev->broadcast, buf);
return 0;
case ARPHRD_IPGRE:
return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
default:
if (dir) {
memcpy(buf, dev->broadcast, dev->addr_len);
return 0;
}
}
return -EINVAL;
}
EXPORT_SYMBOL(ndisc_mc_map);
static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
__u32 *hash_rnd)
{
return ndisc_hashfn(pkey, dev, hash_rnd);
}
static bool ndisc_key_eq(const struct neighbour *n, const void *pkey)
{
return neigh_key_eq128(n, pkey);
}
static int ndisc_constructor(struct neighbour *neigh)
{
struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key;
struct net_device *dev = neigh->dev;
struct inet6_dev *in6_dev;
struct neigh_parms *parms;
bool is_multicast = ipv6_addr_is_multicast(addr);
in6_dev = in6_dev_get(dev);
if (!in6_dev) {
return -EINVAL;
}
parms = in6_dev->nd_parms;
__neigh_parms_put(neigh->parms);
neigh->parms = neigh_parms_clone(parms);
neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
if (!dev->header_ops) {
neigh->nud_state = NUD_NOARP;
neigh->ops = &ndisc_direct_ops;
neigh->output = neigh_direct_output;
} else {
if (is_multicast) {
neigh->nud_state = NUD_NOARP;
ndisc_mc_map(addr, neigh->ha, dev, 1);
} else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
if (dev->flags&IFF_LOOPBACK)
neigh->type = RTN_LOCAL;
} else if (dev->flags&IFF_POINTOPOINT) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->broadcast, dev->addr_len);
}
if (dev->header_ops->cache)
neigh->ops = &ndisc_hh_ops;
else
neigh->ops = &ndisc_generic_ops;
if (neigh->nud_state&NUD_VALID)
neigh->output = neigh->ops->connected_output;
else
neigh->output = neigh->ops->output;
}
in6_dev_put(in6_dev);
return 0;
}
static int pndisc_constructor(struct pneigh_entry *n)
{
struct in6_addr *addr = (struct in6_addr *)&n->key;
struct in6_addr maddr;
struct net_device *dev = n->dev;
if (!dev || !__in6_dev_get(dev))
return -EINVAL;
addrconf_addr_solict_mult(addr, &maddr);
ipv6_dev_mc_inc(dev, &maddr);
return 0;
}
static void pndisc_destructor(struct pneigh_entry *n)
{
struct in6_addr *addr = (struct in6_addr *)&n->key;
struct in6_addr maddr;
struct net_device *dev = n->dev;
if (!dev || !__in6_dev_get(dev))
return;
addrconf_addr_solict_mult(addr, &maddr);
ipv6_dev_mc_dec(dev, &maddr);
}
/* called with rtnl held */
static bool ndisc_allow_add(const struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct inet6_dev *idev = __in6_dev_get(dev);
if (!idev || idev->cnf.disable_ipv6) {
NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device");
return false;
}
return true;
}
static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
int len)
{
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
struct sk_buff *skb;
skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
if (!skb) {
ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
__func__);
return NULL;
}
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
skb_reset_transport_header(skb);
/* Manually assign socket ownership as we avoid calling
* sock_alloc_send_pskb() to bypass wmem buffer limits
*/
skb_set_owner_w(skb, sk);
return skb;
}
static void ip6_nd_hdr(struct sk_buff *skb,
const struct in6_addr *saddr,
const struct in6_addr *daddr,
int hop_limit, int len)
{
struct ipv6hdr *hdr;
struct inet6_dev *idev;
unsigned tclass;
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
tclass = idev ? idev->cnf.ndisc_tclass : 0;
rcu_read_unlock();
skb_push(skb, sizeof(*hdr));
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
ip6_flow_hdr(hdr, tclass, 0);
hdr->payload_len = htons(len);
hdr->nexthdr = IPPROTO_ICMPV6;
hdr->hop_limit = hop_limit;
hdr->saddr = *saddr;
hdr->daddr = *daddr;
}
static void ndisc_send_skb(struct sk_buff *skb,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
struct dst_entry *dst = skb_dst(skb);
struct net *net = dev_net(skb->dev);
struct sock *sk = net->ipv6.ndisc_sk;
struct inet6_dev *idev;
int err;
struct icmp6hdr *icmp6h = icmp6_hdr(skb);
u8 type;
type = icmp6h->icmp6_type;
if (!dst) {
struct flowi6 fl6;
int oif = skb->dev->ifindex;
icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
dst = icmp6_dst_alloc(skb->dev, &fl6);
if (IS_ERR(dst)) {
kfree_skb(skb);
return;
}
skb_dst_set(skb, dst);
}
icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len,
IPPROTO_ICMPV6,
csum_partial(icmp6h,
skb->len, 0));
ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, dst->dev,
dst_output);
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
rcu_read_unlock();
}
void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
bool router, bool solicited, bool override, bool inc_opt)
{
struct sk_buff *skb;
struct in6_addr tmpaddr;
struct inet6_ifaddr *ifp;
const struct in6_addr *src_addr;
struct nd_msg *msg;
int optlen = 0;
/* for anycast or proxy, solicited_addr != src_addr */
ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
if (ifp) {
src_addr = solicited_addr;
if (ifp->flags & IFA_F_OPTIMISTIC)
override = false;
inc_opt |= ifp->idev->cnf.force_tllao;
in6_ifa_put(ifp);
} else {
if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
&tmpaddr))
return;
src_addr = &tmpaddr;
}
if (!dev->addr_len)
inc_opt = false;
if (inc_opt)
optlen += ndisc_opt_addr_space(dev,
NDISC_NEIGHBOUR_ADVERTISEMENT);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
return;
msg = skb_put(skb, sizeof(*msg));
*msg = (struct nd_msg) {
.icmph = {
.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
.icmp6_router = router,
.icmp6_solicited = solicited,
.icmp6_override = override,
},
.target = *solicited_addr,
};
if (inc_opt)
ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
dev->dev_addr,
NDISC_NEIGHBOUR_ADVERTISEMENT);
ndisc_send_skb(skb, daddr, src_addr);
}
static void ndisc_send_unsol_na(struct net_device *dev)
{
struct inet6_dev *idev;
struct inet6_ifaddr *ifa;
idev = in6_dev_get(dev);
if (!idev)
return;
read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
/* skip tentative addresses until dad completes */
if (ifa->flags & IFA_F_TENTATIVE &&
!(ifa->flags & IFA_F_OPTIMISTIC))
continue;
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
/*router=*/ !!idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
/*inc_opt=*/ true);
}
read_unlock_bh(&idev->lock);
in6_dev_put(idev);
}
void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
const struct in6_addr *daddr, const struct in6_addr *saddr,
u64 nonce)
{
struct sk_buff *skb;
struct in6_addr addr_buf;
int inc_opt = dev->addr_len;
int optlen = 0;
struct nd_msg *msg;
if (!saddr) {
if (ipv6_get_lladdr(dev, &addr_buf,
(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
return;
saddr = &addr_buf;
}
if (ipv6_addr_any(saddr))
inc_opt = false;
if (inc_opt)
optlen += ndisc_opt_addr_space(dev,
NDISC_NEIGHBOUR_SOLICITATION);
if (nonce != 0)
optlen += 8;
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
return;
msg = skb_put(skb, sizeof(*msg));
*msg = (struct nd_msg) {
.icmph = {
.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
},
.target = *solicit,
};
if (inc_opt)
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
dev->dev_addr,
NDISC_NEIGHBOUR_SOLICITATION);
if (nonce != 0) {
u8 *opt = skb_put(skb, 8);
opt[0] = ND_OPT_NONCE;
opt[1] = 8 >> 3;
memcpy(opt + 2, &nonce, 6);
}
ndisc_send_skb(skb, daddr, saddr);
}
void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
const struct in6_addr *daddr)
{
struct sk_buff *skb;
struct rs_msg *msg;
int send_sllao = dev->addr_len;
int optlen = 0;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
/*
* According to section 2.2 of RFC 4429, we must not
* send router solicitations with a sllao from
* optimistic addresses, but we may send the solicitation
* if we don't include the sllao. So here we check
* if our address is optimistic, and if so, we
* suppress the inclusion of the sllao.
*/
if (send_sllao) {
struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
dev, 1);
if (ifp) {
if (ifp->flags & IFA_F_OPTIMISTIC) {
send_sllao = 0;
}
in6_ifa_put(ifp);
} else {
send_sllao = 0;
}
}
#endif
if (send_sllao)
optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
return;
msg = skb_put(skb, sizeof(*msg));
*msg = (struct rs_msg) {
.icmph = {
.icmp6_type = NDISC_ROUTER_SOLICITATION,
},
};
if (send_sllao)
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
dev->dev_addr,
NDISC_ROUTER_SOLICITATION);
ndisc_send_skb(skb, daddr, saddr);
}
static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
{
/*
* "The sender MUST return an ICMP
* destination unreachable"
*/
dst_link_failure(skb);
kfree_skb(skb);
}
/* Called with locked neigh: either read or both */
static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
struct in6_addr *saddr = NULL;
struct in6_addr mcaddr;
struct net_device *dev = neigh->dev;
struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
int probes = atomic_read(&neigh->probes);
if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
dev, false, 1,
IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
saddr = &ipv6_hdr(skb)->saddr;
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
if (probes < 0) {
if (!(neigh->nud_state & NUD_VALID)) {
ND_PRINTK(1, dbg,
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
}
ndisc_send_ns(dev, target, target, saddr, 0);
} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
neigh_app_ns(neigh);
} else {
addrconf_addr_solict_mult(target, &mcaddr);
ndisc_send_ns(dev, target, &mcaddr, saddr, 0);
}
}
static int pndisc_is_router(const void *pkey,
struct net_device *dev)
{
struct pneigh_entry *n;
int ret = -1;
read_lock_bh(&nd_tbl.lock);
n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
if (n)
ret = !!(n->flags & NTF_ROUTER);
read_unlock_bh(&nd_tbl.lock);
return ret;
}
void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
struct ndisc_options *ndopts)
{
neigh_update(neigh, lladdr, new, flags, 0);
/* report ndisc ops about neighbour update */
ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
}
static void ndisc_recv_ns(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
struct inet6_ifaddr *ifp;
struct inet6_dev *idev = NULL;
struct neighbour *neigh;
int dad = ipv6_addr_any(saddr);
bool inc;
int is_router = -1;
u64 nonce = 0;
if (skb->len < sizeof(struct nd_msg)) {
ND_PRINTK(2, warn, "NS: packet too short\n");
return;
}
if (ipv6_addr_is_multicast(&msg->target)) {
ND_PRINTK(2, warn, "NS: multicast target address\n");
return;
}
/*
* RFC2461 7.1.1:
* DAD has to be destined for solicited node multicast address.
*/
if (dad && !ipv6_addr_is_solict_mult(daddr)) {
ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n");
return;
}
if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND options\n");
return;
}
if (ndopts.nd_opts_src_lladdr) {
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
if (!lladdr) {
ND_PRINTK(2, warn,
"NS: invalid link-layer address length\n");
return;
}
/* RFC2461 7.1.1:
* If the IP source address is the unspecified address,
* there MUST NOT be source link-layer address option
* in the message.
*/
if (dad) {
ND_PRINTK(2, warn,
"NS: bad DAD packet (link-layer address option)\n");
return;
}
}
if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1)
memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);
inc = ipv6_addr_is_multicast(daddr);
ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
if (ifp) {
have_ifp:
if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
if (dad) {
if (nonce != 0 && ifp->dad_nonce == nonce) {
u8 *np = (u8 *)&nonce;
/* Matching nonce if looped back */
ND_PRINTK(2, notice,
"%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n",
ifp->idev->dev->name,
&ifp->addr, np);
goto out;
}
/*
* We are colliding with another node
* who is doing DAD
* so fail our DAD process
*/
addrconf_dad_failure(skb, ifp);
return;
} else {
/*
* This is not a dad solicitation.
* If we are an optimistic node,
* we should respond.
* Otherwise, we should ignore it.
*/
if (!(ifp->flags & IFA_F_OPTIMISTIC))
goto out;
}
}
idev = ifp->idev;
} else {
struct net *net = dev_net(dev);
/* perhaps an address on the master device */
if (netif_is_l3_slave(dev)) {
struct net_device *mdev;
mdev = netdev_master_upper_dev_get_rcu(dev);
if (mdev) {
ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
if (ifp)
goto have_ifp;
}
}
idev = in6_dev_get(dev);
if (!idev) {
/* XXX: count this drop? */
return;
}
if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
(idev->cnf.forwarding &&
(net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) &&
(is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
skb->pkt_type != PACKET_HOST &&
inc &&
NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) {
/*
* for anycast or proxy,
* sender should delay its response
* by a random time between 0 and
* MAX_ANYCAST_DELAY_TIME seconds.
* (RFC2461) -- yoshfuji
*/
struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
if (n)
pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
goto out;
}
} else
goto out;
}
if (is_router < 0)
is_router = idev->cnf.forwarding;
if (dad) {
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
!!is_router, false, (ifp != NULL), true);
goto out;
}
if (inc)
NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
else
NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
/*
* update / create cache entry
* for the source address
*/
neigh = __neigh_lookup(&nd_tbl, saddr, dev,
!inc || lladdr || !dev->addr_len);
if (neigh)
ndisc_update(dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE,
NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
if (neigh || !dev->header_ops) {
ndisc_send_na(dev, saddr, &msg->target, !!is_router,
true, (ifp != NULL && inc), inc);
if (neigh)
neigh_release(neigh);
}
out:
if (ifp)
in6_ifa_put(ifp);
else
in6_dev_put(idev);
}
static void ndisc_recv_na(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
struct inet6_dev *idev = __in6_dev_get(dev);
struct inet6_ifaddr *ifp;
struct neighbour *neigh;
if (skb->len < sizeof(struct nd_msg)) {
ND_PRINTK(2, warn, "NA: packet too short\n");
return;
}
if (ipv6_addr_is_multicast(&msg->target)) {
ND_PRINTK(2, warn, "NA: target address is multicast\n");
return;
}
if (ipv6_addr_is_multicast(daddr) &&
msg->icmph.icmp6_solicited) {
ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n");
return;
}
/* For some 802.11 wireless deployments (and possibly other networks),
* there will be a NA proxy and unsolicitd packets are attacks
* and thus should not be accepted.
*/
if (!msg->icmph.icmp6_solicited && idev &&
idev->cnf.drop_unsolicited_na)
return;
if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND option\n");
return;
}
if (ndopts.nd_opts_tgt_lladdr) {
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
if (!lladdr) {
ND_PRINTK(2, warn,
"NA: invalid link-layer address length\n");
return;
}
}
ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
if (ifp) {
if (skb->pkt_type != PACKET_LOOPBACK
&& (ifp->flags & IFA_F_TENTATIVE)) {
addrconf_dad_failure(skb, ifp);
return;
}
/* What should we make now? The advertisement
is invalid, but ndisc specs say nothing
about it. It could be misconfiguration, or
an smart proxy agent tries to help us :-)
We should not print the error if NA has been
received from loopback - it is just our own
unsolicited advertisement.
*/
if (skb->pkt_type != PACKET_LOOPBACK)
ND_PRINTK(1, warn,
"NA: %pM advertised our address %pI6c on %s!\n",
eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name);
in6_ifa_put(ifp);
return;
}
neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
if (neigh) {
u8 old_flags = neigh->flags;
struct net *net = dev_net(dev);
if (neigh->nud_state & NUD_FAILED)
goto out;
/*
* Don't update the neighbor cache entry on a proxy NA from
* ourselves because either the proxied node is off link or it
* has already sent a NA to us.
*/
if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
/* XXX: idev->cnf.proxy_ndp */
goto out;
}
ndisc_update(dev, neigh, lladdr,
msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
(msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
(msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0),
NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts);
if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
/*
* Change: router to host
*/
rt6_clean_tohost(dev_net(dev), saddr);
}
out:
neigh_release(neigh);
}
}
static void ndisc_recv_rs(struct sk_buff *skb)
{
struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
struct neighbour *neigh;
struct inet6_dev *idev;
const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
struct ndisc_options ndopts;
u8 *lladdr = NULL;
if (skb->len < sizeof(*rs_msg))
return;
idev = __in6_dev_get(skb->dev);
if (!idev) {
ND_PRINTK(1, err, "RS: can't find in6 device\n");
return;
}
/* Don't accept RS if we're not in router mode */
if (!idev->cnf.forwarding)
goto out;
/*
* Don't update NCE if src = ::;
* this implies that the source node has no ip address assigned yet.
*/
if (ipv6_addr_any(saddr))
goto out;
/* Parse ND options */
if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n");
goto out;
}
if (ndopts.nd_opts_src_lladdr) {
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
skb->dev);
if (!lladdr)
goto out;
}
neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
if (neigh) {
ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
NDISC_ROUTER_SOLICITATION, &ndopts);
neigh_release(neigh);
}
out:
return;
}
static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
{
struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
struct sk_buff *skb;
struct nlmsghdr *nlh;
struct nduseroptmsg *ndmsg;
struct net *net = dev_net(ra->dev);
int err;
int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
+ (opt->nd_opt_len << 3));
size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
skb = nlmsg_new(msg_size, GFP_ATOMIC);
if (!skb) {
err = -ENOBUFS;
goto errout;
}
nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
if (!nlh) {
goto nla_put_failure;
}
ndmsg = nlmsg_data(nlh);
ndmsg->nduseropt_family = AF_INET6;
ndmsg->nduseropt_ifindex = ra->dev->ifindex;
ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr))
goto nla_put_failure;
nlmsg_end(skb, nlh);
rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
return;
nla_put_failure:
nlmsg_free(skb);
err = -EMSGSIZE;
errout:
rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
}
static void ndisc_router_discovery(struct sk_buff *skb)
{
struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
struct neighbour *neigh = NULL;
struct inet6_dev *in6_dev;
struct fib6_info *rt = NULL;
u32 defrtr_usr_metric;
struct net *net;
int lifetime;
struct ndisc_options ndopts;
int optlen;
unsigned int pref = 0;
__u32 old_if_flags;
bool send_ifinfo_notify = false;
__u8 *opt = (__u8 *)(ra_msg + 1);
optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
sizeof(struct ra_msg);
ND_PRINTK(2, info,
"RA: %s, dev: %s\n",
__func__, skb->dev->name);
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK(2, warn, "RA: source address is not link-local\n");
return;
}
if (optlen < 0) {
ND_PRINTK(2, warn, "RA: packet too short\n");
return;
}
#ifdef CONFIG_IPV6_NDISC_NODETYPE
if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
ND_PRINTK(2, warn, "RA: from host or unauthorized router\n");
return;
}
#endif
/*
* set the RA_RECV flag in the interface
*/
in6_dev = __in6_dev_get(skb->dev);
if (!in6_dev) {
ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n",
skb->dev->name);
return;
}
if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) {
ND_PRINTK(2, warn, "RA: invalid ND options\n");
return;
}
if (!ipv6_accept_ra(in6_dev)) {
ND_PRINTK(2, info,
"RA: %s, did not accept ra for dev: %s\n",
__func__, skb->dev->name);
goto skip_linkparms;
}
#ifdef CONFIG_IPV6_NDISC_NODETYPE
/* skip link-specific parameters from interior routers */
if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
ND_PRINTK(2, info,
"RA: %s, nodetype is NODEFAULT, dev: %s\n",
__func__, skb->dev->name);
goto skip_linkparms;
}
#endif
if (in6_dev->if_flags & IF_RS_SENT) {
/*
* flag that an RA was received after an RS was sent
* out on this interface.
*/
in6_dev->if_flags |= IF_RA_RCVD;
}
/*
* Remember the managed/otherconf flags from most recently
* received RA message (RFC 2462) -- yoshfuji
*/
old_if_flags = in6_dev->if_flags;
in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
IF_RA_OTHERCONF)) |
(ra_msg->icmph.icmp6_addrconf_managed ?
IF_RA_MANAGED : 0) |
(ra_msg->icmph.icmp6_addrconf_other ?
IF_RA_OTHERCONF : 0);
if (old_if_flags != in6_dev->if_flags)
send_ifinfo_notify = true;
if (!in6_dev->cnf.accept_ra_defrtr) {
ND_PRINTK(2, info,
"RA: %s, defrtr is false for dev: %s\n",
__func__, skb->dev->name);
goto skip_defrtr;
}
/* Do not accept RA with source-addr found on local machine unless
* accept_ra_from_local is set to true.
*/
net = dev_net(in6_dev->dev);
if (!in6_dev->cnf.accept_ra_from_local &&
ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: default router ignored\n",
skb->dev->name);
goto skip_defrtr;
}
lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
#ifdef CONFIG_IPV6_ROUTER_PREF
pref = ra_msg->icmph.icmp6_router_pref;
/* 10b is handled as if it were 00b (medium) */
if (pref == ICMPV6_ROUTER_PREF_INVALID ||
!in6_dev->cnf.accept_ra_rtr_pref)
pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
/* routes added from RAs do not use nexthop objects */
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
if (rt) {
neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
rt->fib6_nh->fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
"RA: %s got default router without neighbour\n",
__func__);
fib6_info_release(rt);
return;
}
}
/* Set default route metric as specified by user */
defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric;
/* delete the route if lifetime is 0 or if metric needs change */
if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) {
ip6_del_rt(net, rt, false);
rt = NULL;
}
ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n",
rt, lifetime, defrtr_usr_metric, skb->dev->name);
if (!rt && lifetime) {
ND_PRINTK(3, info, "RA: adding default router\n");
rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
skb->dev, pref, defrtr_usr_metric);
if (!rt) {
ND_PRINTK(0, err,
"RA: %s failed to add default route\n",
__func__);
return;
}
neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
rt->fib6_nh->fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
"RA: %s got default router without neighbour\n",
__func__);
fib6_info_release(rt);
return;
}
neigh->flags |= NTF_ROUTER;
} else if (rt) {
rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
}
if (rt)
fib6_set_expires(rt, jiffies + (HZ * lifetime));
if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
ra_msg->icmph.icmp6_hop_limit) {
if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
fib6_metric_set(rt, RTAX_HOPLIMIT,
ra_msg->icmph.icmp6_hop_limit);
} else {
ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
}
}
skip_defrtr:
/*
* Update Reachable Time and Retrans Timer
*/
if (in6_dev->nd_parms) {
unsigned long rtime = ntohl(ra_msg->retrans_timer);
if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
rtime = (rtime*HZ)/1000;
if (rtime < HZ/100)
rtime = HZ/100;
NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime);
in6_dev->tstamp = jiffies;
send_ifinfo_notify = true;
}
rtime = ntohl(ra_msg->reachable_time);
if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
rtime = (rtime*HZ)/1000;
if (rtime < HZ/10)
rtime = HZ/10;
if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) {
NEIGH_VAR_SET(in6_dev->nd_parms,
BASE_REACHABLE_TIME, rtime);
NEIGH_VAR_SET(in6_dev->nd_parms,
GC_STALETIME, 3 * rtime);
in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
in6_dev->tstamp = jiffies;
send_ifinfo_notify = true;
}
}
}
/*
* Send a notify if RA changed managed/otherconf flags or timer settings
*/
if (send_ifinfo_notify)
inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
skip_linkparms:
/*
* Process options.
*/
if (!neigh)
neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
skb->dev, 1);
if (neigh) {
u8 *lladdr = NULL;
if (ndopts.nd_opts_src_lladdr) {
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
skb->dev);
if (!lladdr) {
ND_PRINTK(2, warn,
"RA: invalid link-layer address length\n");
goto out;
}
}
ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
NEIGH_UPDATE_F_ISROUTER,
NDISC_ROUTER_ADVERTISEMENT, &ndopts);
}
if (!ipv6_accept_ra(in6_dev)) {
ND_PRINTK(2, info,
"RA: %s, accept_ra is false for dev: %s\n",
__func__, skb->dev->name);
goto out;
}
#ifdef CONFIG_IPV6_ROUTE_INFO
if (!in6_dev->cnf.accept_ra_from_local &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: router info ignored.\n",
skb->dev->name);
goto skip_routeinfo;
}
if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
struct nd_opt_hdr *p;
for (p = ndopts.nd_opts_ri;
p;
p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
struct route_info *ri = (struct route_info *)p;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
ri->prefix_len == 0)
continue;
#endif
if (ri->prefix_len == 0 &&
!in6_dev->cnf.accept_ra_defrtr)
continue;
if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
continue;
if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
continue;
rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
&ipv6_hdr(skb)->saddr);
}
}
skip_routeinfo:
#endif
#ifdef CONFIG_IPV6_NDISC_NODETYPE
/* skip link-specific ndopts from interior routers */
if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
ND_PRINTK(2, info,
"RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n",
__func__, skb->dev->name);
goto out;
}
#endif
if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
struct nd_opt_hdr *p;
for (p = ndopts.nd_opts_pi;
p;
p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
addrconf_prefix_rcv(skb->dev, (u8 *)p,
(p->nd_opt_len) << 3,
ndopts.nd_opts_src_lladdr != NULL);
}
}
if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) {
__be32 n;
u32 mtu;
memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
mtu = ntohl(n);
if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
} else if (in6_dev->cnf.mtu6 != mtu) {
in6_dev->cnf.mtu6 = mtu;
fib6_metric_set(rt, RTAX_MTU, mtu);
rt6_mtu_change(skb->dev, mtu);
}
}
if (ndopts.nd_useropts) {
struct nd_opt_hdr *p;
for (p = ndopts.nd_useropts;
p;
p = ndisc_next_useropt(skb->dev, p,
ndopts.nd_useropts_end)) {
ndisc_ra_useropt(skb, p);
}
}
if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
ND_PRINTK(2, warn, "RA: invalid RA options\n");
}
out:
fib6_info_release(rt);
if (neigh)
neigh_release(neigh);
}
static void ndisc_redirect_rcv(struct sk_buff *skb)
{
u8 *hdr;
struct ndisc_options ndopts;
struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct rd_msg, opt));
#ifdef CONFIG_IPV6_NDISC_NODETYPE
switch (skb->ndisc_nodetype) {
case NDISC_NODETYPE_HOST:
case NDISC_NODETYPE_NODEFAULT:
ND_PRINTK(2, warn,
"Redirect: from host or unauthorized router\n");
return;
}
#endif
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK(2, warn,
"Redirect: source address is not link-local\n");
return;
}
if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
return;
if (!ndopts.nd_opts_rh) {
ip6_redirect_no_header(skb, dev_net(skb->dev),
skb->dev->ifindex);
return;
}
hdr = (u8 *)ndopts.nd_opts_rh;
hdr += 8;
if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
return;
icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
}
static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
struct sk_buff *orig_skb,
int rd_len)
{
u8 *opt = skb_put(skb, rd_len);
memset(opt, 0, 8);
*(opt++) = ND_OPT_REDIRECT_HDR;
*(opt++) = (rd_len >> 3);
opt += 6;
skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt,
rd_len - 8);
}
void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
{
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
struct sock *sk = net->ipv6.ndisc_sk;
int optlen = 0;
struct inet_peer *peer;
struct sk_buff *buff;
struct rd_msg *msg;
struct in6_addr saddr_buf;
struct rt6_info *rt;
struct dst_entry *dst;
struct flowi6 fl6;
int rd_len;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
bool ret;
if (netif_is_l3_master(skb->dev)) {
dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
if (!dev)
return;
}
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
dev->name);
return;
}
if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK(2, warn,
"Redirect: target address is not link-local unicast\n");
return;
}
icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
&saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
dst_release(dst);
return;
}
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
if (IS_ERR(dst))
return;
rt = (struct rt6_info *) dst;
if (rt->rt6i_flags & RTF_GATEWAY) {
ND_PRINTK(2, warn,
"Redirect: destination is not a neighbour\n");
goto release;
}
peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
ret = inet_peer_xrlim_allow(peer, 1*HZ);
if (peer)
inet_putpeer(peer);
if (!ret)
goto release;
if (dev->addr_len) {
struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target);
if (!neigh) {
ND_PRINTK(2, warn,
"Redirect: no neigh for target address\n");
goto release;
}
read_lock_bh(&neigh->lock);
if (neigh->nud_state & NUD_VALID) {
memcpy(ha_buf, neigh->ha, dev->addr_len);
read_unlock_bh(&neigh->lock);
ha = ha_buf;
optlen += ndisc_redirect_opt_addr_space(dev, neigh,
ops_data_buf,
&ops_data);
} else
read_unlock_bh(&neigh->lock);
neigh_release(neigh);
}
rd_len = min_t(unsigned int,
IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen,
skb->len + 8);
rd_len &= ~0x7;
optlen += rd_len;
buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!buff)
goto release;
msg = skb_put(buff, sizeof(*msg));
*msg = (struct rd_msg) {
.icmph = {
.icmp6_type = NDISC_REDIRECT,
},
.target = *target,
.dest = ipv6_hdr(skb)->daddr,
};
/*
* include target_address option
*/
if (ha)
ndisc_fill_redirect_addr_option(buff, ha, ops_data);
/*
* build redirect option and copy skb over to the new packet.
*/
if (rd_len)
ndisc_fill_redirect_hdr_option(buff, skb, rd_len);
skb_dst_set(buff, dst);
ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf);
return;
release:
dst_release(dst);
}
static void pndisc_redo(struct sk_buff *skb)
{
ndisc_recv_ns(skb);
kfree_skb(skb);
}
static int ndisc_is_multicast(const void *pkey)
{
return ipv6_addr_is_multicast((struct in6_addr *)pkey);
}
static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
{
struct inet6_dev *idev = __in6_dev_get(skb->dev);
if (!idev)
return true;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
idev->cnf.suppress_frag_ndisc) {
net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
return true;
}
return false;
}
int ndisc_rcv(struct sk_buff *skb)
{
struct nd_msg *msg;
if (ndisc_suppress_frag_ndisc(skb))
return 0;
if (skb_linearize(skb))
return 0;
msg = (struct nd_msg *)skb_transport_header(skb);
__skb_push(skb, skb->data - skb_transport_header(skb));
if (ipv6_hdr(skb)->hop_limit != 255) {
ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n",
ipv6_hdr(skb)->hop_limit);
return 0;
}
if (msg->icmph.icmp6_code != 0) {
ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n",
msg->icmph.icmp6_code);
return 0;
}
switch (msg->icmph.icmp6_type) {
case NDISC_NEIGHBOUR_SOLICITATION:
memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
ndisc_recv_ns(skb);
break;
case NDISC_NEIGHBOUR_ADVERTISEMENT:
ndisc_recv_na(skb);
break;
case NDISC_ROUTER_SOLICITATION:
ndisc_recv_rs(skb);
break;
case NDISC_ROUTER_ADVERTISEMENT:
ndisc_router_discovery(skb);
break;
case NDISC_REDIRECT:
ndisc_redirect_rcv(skb);
break;
}
return 0;
}
static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_change_info *change_info;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&nd_tbl, dev);
fib6_run_gc(0, net, false);
fallthrough;
case NETDEV_UP:
idev = in6_dev_get(dev);
if (!idev)
break;
if (idev->cnf.ndisc_notify ||
net->ipv6.devconf_all->ndisc_notify)
ndisc_send_unsol_na(dev);
in6_dev_put(idev);
break;
case NETDEV_CHANGE:
change_info = ptr;
if (change_info->flags_changed & IFF_NOARP)
neigh_changeaddr(&nd_tbl, dev);
if (!netif_carrier_ok(dev))
neigh_carrier_down(&nd_tbl, dev);
break;
case NETDEV_DOWN:
neigh_ifdown(&nd_tbl, dev);
fib6_run_gc(0, net, false);
break;
case NETDEV_NOTIFY_PEERS:
ndisc_send_unsol_na(dev);
break;
default:
break;
}
return NOTIFY_DONE;
}
static struct notifier_block ndisc_netdev_notifier = {
.notifier_call = ndisc_netdev_event,
.priority = ADDRCONF_NOTIFY_PRIORITY - 5,
};
#ifdef CONFIG_SYSCTL
static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
const char *func, const char *dev_name)
{
static char warncomm[TASK_COMM_LEN];
static int warned;
if (strcmp(warncomm, current->comm) && warned < 5) {
strcpy(warncomm, current->comm);
pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
warncomm, func,
dev_name, ctl->procname,
dev_name, ctl->procname);
warned++;
}
}
int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
struct net_device *dev = ctl->extra1;
struct inet6_dev *idev;
int ret;
if ((strcmp(ctl->procname, "retrans_time") == 0) ||
(strcmp(ctl->procname, "base_reachable_time") == 0))
ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
if (strcmp(ctl->procname, "retrans_time") == 0)
ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos);
else if (strcmp(ctl->procname, "base_reachable_time") == 0)
ret = neigh_proc_dointvec_jiffies(ctl, write,
buffer, lenp, ppos);
else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
(strcmp(ctl->procname, "base_reachable_time_ms") == 0))
ret = neigh_proc_dointvec_ms_jiffies(ctl, write,
buffer, lenp, ppos);
else
ret = -1;
if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
idev->nd_parms->reachable_time =
neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
idev->tstamp = jiffies;
inet6_ifinfo_notify(RTM_NEWLINK, idev);
in6_dev_put(idev);
}
return ret;
}
#endif
static int __net_init ndisc_net_init(struct net *net)
{
struct ipv6_pinfo *np;
struct sock *sk;
int err;
err = inet_ctl_sock_create(&sk, PF_INET6,
SOCK_RAW, IPPROTO_ICMPV6, net);
if (err < 0) {
ND_PRINTK(0, err,
"NDISC: Failed to initialize the control socket (err %d)\n",
err);
return err;
}
net->ipv6.ndisc_sk = sk;
np = inet6_sk(sk);
np->hop_limit = 255;
/* Do not loopback ndisc messages */
np->mc_loop = 0;
return 0;
}
static void __net_exit ndisc_net_exit(struct net *net)
{
inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
}
static struct pernet_operations ndisc_net_ops = {
.init = ndisc_net_init,
.exit = ndisc_net_exit,
};
int __init ndisc_init(void)
{
int err;
err = register_pernet_subsys(&ndisc_net_ops);
if (err)
return err;
/*
* Initialize the neighbour table
*/
neigh_table_init(NEIGH_ND_TABLE, &nd_tbl);
#ifdef CONFIG_SYSCTL
err = neigh_sysctl_register(NULL, &nd_tbl.parms,
ndisc_ifinfo_sysctl_change);
if (err)
goto out_unregister_pernet;
out:
#endif
return err;
#ifdef CONFIG_SYSCTL
out_unregister_pernet:
unregister_pernet_subsys(&ndisc_net_ops);
goto out;
#endif
}
int __init ndisc_late_init(void)
{
return register_netdevice_notifier(&ndisc_netdev_notifier);
}
void ndisc_late_cleanup(void)
{
unregister_netdevice_notifier(&ndisc_netdev_notifier);
}
void ndisc_cleanup(void)
{
#ifdef CONFIG_SYSCTL
neigh_sysctl_unregister(&nd_tbl.parms);
#endif
neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl);
unregister_pernet_subsys(&ndisc_net_ops);
}