linux/net/ipv6/sysctl_net_ipv6.c
Jakub Sitnicki 22b6722bfa ipv6: Add sysctl for per namespace flow label reflection
Reflecting IPv6 Flow Label at server nodes is useful in environments
that employ multipath routing to load balance the requests. As "IPv6
Flow Label Reflection" standard draft [1] points out - ICMPv6 PTB error
messages generated in response to a downstream packets from the server
can be routed by a load balancer back to the original server without
looking at transport headers, if the server applies the flow label
reflection. This enables the Path MTU Discovery past the ECMP router in
load-balance or anycast environments where each server node is reachable
by only one path.

Introduce a sysctl to enable flow label reflection per net namespace for
all newly created sockets. Same could be earlier achieved only per
socket by setting the IPV6_FL_F_REFLECT flag for the IPV6_FLOWLABEL_MGR
socket option.

[1] https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01

Signed-off-by: Jakub Sitnicki <jkbs@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-08-24 18:05:43 -07:00

249 lines
6.0 KiB
C

/*
* sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem.
*
* Changes:
* YOSHIFUJI Hideaki @USAGI: added icmp sysctl table.
*/
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/in6.h>
#include <linux/ipv6.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <net/ndisc.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/inet_frag.h>
#ifdef CONFIG_NETLABEL
#include <net/calipso.h>
#endif
static int one = 1;
static int auto_flowlabels_min;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
static struct ctl_table ipv6_table_template[] = {
{
.procname = "bindv6only",
.data = &init_net.ipv6.sysctl.bindv6only,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "anycast_src_echo_reply",
.data = &init_net.ipv6.sysctl.anycast_src_echo_reply,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "flowlabel_consistency",
.data = &init_net.ipv6.sysctl.flowlabel_consistency,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "auto_flowlabels",
.data = &init_net.ipv6.sysctl.auto_flowlabels,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &auto_flowlabels_min,
.extra2 = &auto_flowlabels_max
},
{
.procname = "fwmark_reflect",
.data = &init_net.ipv6.sysctl.fwmark_reflect,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "idgen_retries",
.data = &init_net.ipv6.sysctl.idgen_retries,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "idgen_delay",
.data = &init_net.ipv6.sysctl.idgen_delay,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "flowlabel_state_ranges",
.data = &init_net.ipv6.sysctl.flowlabel_state_ranges,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_nonlocal_bind",
.data = &init_net.ipv6.sysctl.ip_nonlocal_bind,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "flowlabel_reflect",
.data = &init_net.ipv6.sysctl.flowlabel_reflect,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ }
};
static struct ctl_table ipv6_rotable[] = {
{
.procname = "mld_max_msf",
.data = &sysctl_mld_max_msf,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "mld_qrv",
.data = &sysctl_mld_qrv,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one
},
#ifdef CONFIG_NETLABEL
{
.procname = "calipso_cache_enable",
.data = &calipso_cache_enabled,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "calipso_cache_bucket_size",
.data = &calipso_cache_bucketsize,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#endif /* CONFIG_NETLABEL */
{ }
};
static int __net_init ipv6_sysctl_net_init(struct net *net)
{
struct ctl_table *ipv6_table;
struct ctl_table *ipv6_route_table;
struct ctl_table *ipv6_icmp_table;
int err;
err = -ENOMEM;
ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template),
GFP_KERNEL);
if (!ipv6_table)
goto out;
ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels;
ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect;
ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries;
ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
goto out_ipv6_table;
ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
if (!ipv6_icmp_table)
goto out_ipv6_route_table;
net->ipv6.sysctl.hdr = register_net_sysctl(net, "net/ipv6", ipv6_table);
if (!net->ipv6.sysctl.hdr)
goto out_ipv6_icmp_table;
net->ipv6.sysctl.route_hdr =
register_net_sysctl(net, "net/ipv6/route", ipv6_route_table);
if (!net->ipv6.sysctl.route_hdr)
goto out_unregister_ipv6_table;
net->ipv6.sysctl.icmp_hdr =
register_net_sysctl(net, "net/ipv6/icmp", ipv6_icmp_table);
if (!net->ipv6.sysctl.icmp_hdr)
goto out_unregister_route_table;
err = 0;
out:
return err;
out_unregister_route_table:
unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
out_unregister_ipv6_table:
unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
out_ipv6_icmp_table:
kfree(ipv6_icmp_table);
out_ipv6_route_table:
kfree(ipv6_route_table);
out_ipv6_table:
kfree(ipv6_table);
goto out;
}
static void __net_exit ipv6_sysctl_net_exit(struct net *net)
{
struct ctl_table *ipv6_table;
struct ctl_table *ipv6_route_table;
struct ctl_table *ipv6_icmp_table;
ipv6_table = net->ipv6.sysctl.hdr->ctl_table_arg;
ipv6_route_table = net->ipv6.sysctl.route_hdr->ctl_table_arg;
ipv6_icmp_table = net->ipv6.sysctl.icmp_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv6.sysctl.icmp_hdr);
unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
kfree(ipv6_table);
kfree(ipv6_route_table);
kfree(ipv6_icmp_table);
}
static struct pernet_operations ipv6_sysctl_net_ops = {
.init = ipv6_sysctl_net_init,
.exit = ipv6_sysctl_net_exit,
};
static struct ctl_table_header *ip6_header;
int ipv6_sysctl_register(void)
{
int err = -ENOMEM;
ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable);
if (!ip6_header)
goto out;
err = register_pernet_subsys(&ipv6_sysctl_net_ops);
if (err)
goto err_pernet;
out:
return err;
err_pernet:
unregister_net_sysctl_table(ip6_header);
goto out;
}
void ipv6_sysctl_unregister(void)
{
unregister_net_sysctl_table(ip6_header);
unregister_pernet_subsys(&ipv6_sysctl_net_ops);
}