mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-19 02:04:19 +08:00
b5facfdba1
ECMP (equal-cost multipath) hashes are typically computed on the packets' 5-tuple(src IP, dst IP, src port, dst port, L4 proto). For encapsulated packets, the L4 data is not readily available and ECMP hashing will often revert to (src IP, dst IP). This will lead to traffic polarization on a single ECMP path, causing congestion and waste of network capacity. In IPv6, the 20-bit flow label field is also used as part of the ECMP hash. In the lack of L4 data, the hashing will be on (src IP, dst IP, flow label). Having a non-zero flow label is thus important for proper traffic load balancing when L4 data is unavailable (i.e., when packets are encapsulated). Currently, the seg6_do_srh_encap() function extracts the original packet's flow label and set it as the outer IPv6 flow label. There are two issues with this behaviour: a) There is no guarantee that the inner flow label is set by the source. b) If the original packet is not IPv6, the flow label will be set to zero (e.g., IPv4 or L2 encap). This patch adds a function, named seg6_make_flowlabel(), that computes a flow label from a given skb. It supports IPv6, IPv4 and L2 payloads, and leverages the per namespace 'seg6_flowlabel" sysctl value. The currently support behaviours are as follows: -1 set flowlabel to zero. 0 copy flowlabel from Inner paceket in case of Inner IPv6 (Set flowlabel to 0 in case IPv4/L2) 1 Compute the flowlabel using seg6_make_flowlabel() This patch has been tested for IPv6, IPv4, and L2 traffic. Signed-off-by: Ahmed Abdelsalam <amsalam20@gmail.com> Acked-by: David Lebrun <dlebrun@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
118 lines
2.9 KiB
C
118 lines
2.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* ipv6 in net namespaces
|
|
*/
|
|
|
|
#include <net/inet_frag.h>
|
|
|
|
#ifndef __NETNS_IPV6_H__
|
|
#define __NETNS_IPV6_H__
|
|
#include <net/dst_ops.h>
|
|
|
|
struct ctl_table_header;
|
|
|
|
struct netns_sysctl_ipv6 {
|
|
#ifdef CONFIG_SYSCTL
|
|
struct ctl_table_header *hdr;
|
|
struct ctl_table_header *route_hdr;
|
|
struct ctl_table_header *icmp_hdr;
|
|
struct ctl_table_header *frags_hdr;
|
|
struct ctl_table_header *xfrm6_hdr;
|
|
#endif
|
|
int bindv6only;
|
|
int flush_delay;
|
|
int ip6_rt_max_size;
|
|
int ip6_rt_gc_min_interval;
|
|
int ip6_rt_gc_timeout;
|
|
int ip6_rt_gc_interval;
|
|
int ip6_rt_gc_elasticity;
|
|
int ip6_rt_mtu_expires;
|
|
int ip6_rt_min_advmss;
|
|
int multipath_hash_policy;
|
|
int flowlabel_consistency;
|
|
int auto_flowlabels;
|
|
int icmpv6_time;
|
|
int anycast_src_echo_reply;
|
|
int ip_nonlocal_bind;
|
|
int fwmark_reflect;
|
|
int idgen_retries;
|
|
int idgen_delay;
|
|
int flowlabel_state_ranges;
|
|
int flowlabel_reflect;
|
|
int max_dst_opts_cnt;
|
|
int max_hbh_opts_cnt;
|
|
int max_dst_opts_len;
|
|
int max_hbh_opts_len;
|
|
int seg6_flowlabel;
|
|
};
|
|
|
|
struct netns_ipv6 {
|
|
struct netns_sysctl_ipv6 sysctl;
|
|
struct ipv6_devconf *devconf_all;
|
|
struct ipv6_devconf *devconf_dflt;
|
|
struct inet_peer_base *peers;
|
|
struct netns_frags frags;
|
|
#ifdef CONFIG_NETFILTER
|
|
struct xt_table *ip6table_filter;
|
|
struct xt_table *ip6table_mangle;
|
|
struct xt_table *ip6table_raw;
|
|
#ifdef CONFIG_SECURITY
|
|
struct xt_table *ip6table_security;
|
|
#endif
|
|
struct xt_table *ip6table_nat;
|
|
#endif
|
|
struct fib6_info *fib6_null_entry;
|
|
struct rt6_info *ip6_null_entry;
|
|
struct rt6_statistics *rt6_stats;
|
|
struct timer_list ip6_fib_timer;
|
|
struct hlist_head *fib_table_hash;
|
|
struct fib6_table *fib6_main_tbl;
|
|
struct list_head fib6_walkers;
|
|
struct dst_ops ip6_dst_ops;
|
|
rwlock_t fib6_walker_lock;
|
|
spinlock_t fib6_gc_lock;
|
|
unsigned int ip6_rt_gc_expire;
|
|
unsigned long ip6_rt_last_gc;
|
|
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
|
|
unsigned int fib6_rules_require_fldissect;
|
|
bool fib6_has_custom_rules;
|
|
struct rt6_info *ip6_prohibit_entry;
|
|
struct rt6_info *ip6_blk_hole_entry;
|
|
struct fib6_table *fib6_local_tbl;
|
|
struct fib_rules_ops *fib6_rules_ops;
|
|
#endif
|
|
struct sock **icmp_sk;
|
|
struct sock *ndisc_sk;
|
|
struct sock *tcp_sk;
|
|
struct sock *igmp_sk;
|
|
struct sock *mc_autojoin_sk;
|
|
#ifdef CONFIG_IPV6_MROUTE
|
|
#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
|
|
struct mr_table *mrt6;
|
|
#else
|
|
struct list_head mr6_tables;
|
|
struct fib_rules_ops *mr6_rules_ops;
|
|
#endif
|
|
#endif
|
|
atomic_t dev_addr_genid;
|
|
atomic_t fib6_sernum;
|
|
struct seg6_pernet_data *seg6_data;
|
|
struct fib_notifier_ops *notifier_ops;
|
|
struct fib_notifier_ops *ip6mr_notifier_ops;
|
|
unsigned int ipmr_seq; /* protected by rtnl_mutex */
|
|
struct {
|
|
struct hlist_head head;
|
|
spinlock_t lock;
|
|
u32 seq;
|
|
} ip6addrlbl_table;
|
|
};
|
|
|
|
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
|
|
struct netns_nf_frag {
|
|
struct netns_sysctl_ipv6 sysctl;
|
|
struct netns_frags frags;
|
|
};
|
|
#endif
|
|
|
|
#endif
|