mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-05 01:54:09 +08:00
Merge branch 'tcp-introduce-optional-per-netns-ehash'
Kuniyuki Iwashima says: ==================== tcp: Introduce optional per-netns ehash. The more sockets we have in the hash table, the longer we spend looking up the socket. While running a number of small workloads on the same host, they penalise each other and cause performance degradation. The root cause might be a single workload that consumes much more resources than the others. It often happens on a cloud service where different workloads share the same computing resource. On EC2 c5.24xlarge instance (196 GiB memory and 524288 (1Mi / 2) ehash entries), after running iperf3 in different netns, creating 24Mi sockets without data transfer in the root netns causes about 10% performance regression for the iperf3's connection. thash_entries sockets length Gbps 524288 1 1 50.7 24Mi 48 45.1 It is basically related to the length of the list of each hash bucket. For testing purposes to see how performance drops along the length, I set 131072 (1Mi / 8) to thash_entries, and here's the result. thash_entries sockets length Gbps 131072 1 1 50.7 1Mi 8 49.9 2Mi 16 48.9 4Mi 32 47.3 8Mi 64 44.6 16Mi 128 40.6 24Mi 192 36.3 32Mi 256 32.5 40Mi 320 27.0 48Mi 384 25.0 To resolve the socket lookup degradation, we introduce an optional per-netns hash table for TCP, but it's just ehash, and we still share the global bhash, bhash2 and lhash2. With a smaller ehash, we can look up non-listener sockets faster and isolate such noisy neighbours. Also, we can reduce lock contention. For details, please see the last patch. patch 1 - 4: prep for per-netns ehash patch 5: small optimisation for netns dismantle without TIME_WAIT sockets patch 6: add per-netns ehash Many thanks to Eric Dumazet for reviewing and advising. ==================== Link: https://lore.kernel.org/r/20220908011022.45342-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
4fa37e4911
@ -1040,6 +1040,35 @@ tcp_challenge_ack_limit - INTEGER
|
||||
TCP stack implements per TCP socket limits anyway.
|
||||
Default: INT_MAX (unlimited)
|
||||
|
||||
tcp_ehash_entries - INTEGER
|
||||
Show the number of hash buckets for TCP sockets in the current
|
||||
networking namespace.
|
||||
|
||||
A negative value means the networking namespace does not own its
|
||||
hash buckets and shares the initial networking namespace's one.
|
||||
|
||||
tcp_child_ehash_entries - INTEGER
|
||||
Control the number of hash buckets for TCP sockets in the child
|
||||
networking namespace, which must be set before clone() or unshare().
|
||||
|
||||
If the value is not 0, the kernel uses a value rounded up to 2^n
|
||||
as the actual hash bucket size. 0 is a special value, meaning
|
||||
the child networking namespace will share the initial networking
|
||||
namespace's hash buckets.
|
||||
|
||||
Note that the child will use the global one in case the kernel
|
||||
fails to allocate enough memory. In addition, the global hash
|
||||
buckets are spread over available NUMA nodes, but the allocation
|
||||
of the child hash table depends on the current process's NUMA
|
||||
policy, which could result in performance differences.
|
||||
|
||||
Note also that the default value of tcp_max_tw_buckets and
|
||||
tcp_max_syn_backlog depend on the hash bucket size.
|
||||
|
||||
Possible values: 0, 2^n (n: 0 - 24 (16Mi))
|
||||
|
||||
Default: 0
|
||||
|
||||
UDP variables
|
||||
=============
|
||||
|
||||
|
@ -1069,8 +1069,7 @@ static void chtls_pass_accept_rpl(struct sk_buff *skb,
|
||||
cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
|
||||
}
|
||||
|
||||
static void inet_inherit_port(struct inet_hashinfo *hash_info,
|
||||
struct sock *lsk, struct sock *newsk)
|
||||
static void inet_inherit_port(struct sock *lsk, struct sock *newsk)
|
||||
{
|
||||
local_bh_disable();
|
||||
__inet_inherit_port(lsk, newsk);
|
||||
@ -1240,7 +1239,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
|
||||
ipv4.sysctl_tcp_window_scaling),
|
||||
tp->window_clamp);
|
||||
neigh_release(n);
|
||||
inet_inherit_port(&tcp_hashinfo, lsk, newsk);
|
||||
inet_inherit_port(lsk, newsk);
|
||||
csk_set_flag(csk, CSK_CONN_INLINE);
|
||||
bh_unlock_sock(newsk); /* tcp_create_openreq_child ->sk_clone_lock */
|
||||
|
||||
|
@ -461,6 +461,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
|
||||
{
|
||||
struct ethhdr *eth = (struct ethhdr *)(skb->data);
|
||||
struct net_device *netdev = rq->netdev;
|
||||
struct net *net = dev_net(netdev);
|
||||
struct sock *sk = NULL;
|
||||
unsigned int datalen;
|
||||
struct iphdr *iph;
|
||||
@ -475,7 +476,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
|
||||
depth += sizeof(struct iphdr);
|
||||
th = (void *)iph + sizeof(struct iphdr);
|
||||
|
||||
sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo,
|
||||
sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
iph->saddr, th->source, iph->daddr,
|
||||
th->dest, netdev->ifindex);
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
@ -485,7 +486,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
|
||||
depth += sizeof(struct ipv6hdr);
|
||||
th = (void *)ipv6h + sizeof(struct ipv6hdr);
|
||||
|
||||
sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo,
|
||||
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
&ipv6h->saddr, th->source,
|
||||
&ipv6h->daddr, ntohs(th->dest),
|
||||
netdev->ifindex, 0);
|
||||
|
@ -474,6 +474,7 @@ int nfp_net_tls_rx_resync_req(struct net_device *netdev,
|
||||
{
|
||||
struct nfp_net *nn = netdev_priv(netdev);
|
||||
struct nfp_net_tls_offload_ctx *ntls;
|
||||
struct net *net = dev_net(netdev);
|
||||
struct ipv6hdr *ipv6h;
|
||||
struct tcphdr *th;
|
||||
struct iphdr *iph;
|
||||
@ -494,13 +495,13 @@ int nfp_net_tls_rx_resync_req(struct net_device *netdev,
|
||||
|
||||
switch (ipv6h->version) {
|
||||
case 4:
|
||||
sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo,
|
||||
sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
iph->saddr, th->source, iph->daddr,
|
||||
th->dest, netdev->ifindex);
|
||||
break;
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
case 6:
|
||||
sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo,
|
||||
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
&ipv6h->saddr, th->source,
|
||||
&ipv6h->daddr, ntohs(th->dest),
|
||||
netdev->ifindex, 0);
|
||||
|
@ -168,8 +168,20 @@ struct inet_hashinfo {
|
||||
/* The 2nd listener table hashed by local port and address */
|
||||
unsigned int lhash2_mask;
|
||||
struct inet_listen_hashbucket *lhash2;
|
||||
|
||||
bool pernet;
|
||||
};
|
||||
|
||||
static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_IP_DCCP)
|
||||
return sk->sk_prot->h.hashinfo ? :
|
||||
sock_net(sk)->ipv4.tcp_death_row.hashinfo;
|
||||
#else
|
||||
return sock_net(sk)->ipv4.tcp_death_row.hashinfo;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct inet_listen_hashbucket *
|
||||
inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash)
|
||||
{
|
||||
@ -204,6 +216,10 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
|
||||
hashinfo->ehash_locks = NULL;
|
||||
}
|
||||
|
||||
struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo,
|
||||
unsigned int ehash_entries);
|
||||
void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo);
|
||||
|
||||
struct inet_bind_bucket *
|
||||
inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
|
||||
struct inet_bind_hashbucket *head,
|
||||
|
@ -34,6 +34,7 @@ struct inet_hashinfo;
|
||||
struct inet_timewait_death_row {
|
||||
refcount_t tw_refcount;
|
||||
|
||||
/* Padding to avoid false sharing, tw_refcount can be often written */
|
||||
struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp;
|
||||
int sysctl_max_tw_buckets;
|
||||
};
|
||||
@ -41,7 +42,7 @@ struct inet_timewait_death_row {
|
||||
struct tcp_fastopen_context;
|
||||
|
||||
struct netns_ipv4 {
|
||||
struct inet_timewait_death_row *tcp_death_row;
|
||||
struct inet_timewait_death_row tcp_death_row;
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
struct ctl_table_header *forw_hdr;
|
||||
@ -170,6 +171,7 @@ struct netns_ipv4 {
|
||||
int sysctl_tcp_pacing_ca_ratio;
|
||||
int sysctl_tcp_wmem[3];
|
||||
int sysctl_tcp_rmem[3];
|
||||
unsigned int sysctl_tcp_child_ehash_entries;
|
||||
unsigned long sysctl_tcp_comp_sack_delay_ns;
|
||||
unsigned long sysctl_tcp_comp_sack_slack_ns;
|
||||
int sysctl_max_syn_backlog;
|
||||
|
@ -346,6 +346,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
|
||||
void tcp_rcv_space_adjust(struct sock *sk);
|
||||
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
|
||||
void tcp_twsk_destructor(struct sock *sk);
|
||||
void tcp_twsk_purge(struct list_head *net_exit_list, int family);
|
||||
ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t len,
|
||||
unsigned int flags);
|
||||
|
@ -6373,6 +6373,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
|
||||
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
|
||||
int dif, int sdif, u8 family, u8 proto)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
|
||||
bool refcounted = false;
|
||||
struct sock *sk = NULL;
|
||||
|
||||
@ -6381,7 +6382,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
|
||||
__be32 dst4 = tuple->ipv4.daddr;
|
||||
|
||||
if (proto == IPPROTO_TCP)
|
||||
sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
|
||||
sk = __inet_lookup(net, hinfo, NULL, 0,
|
||||
src4, tuple->ipv4.sport,
|
||||
dst4, tuple->ipv4.dport,
|
||||
dif, sdif, &refcounted);
|
||||
@ -6395,7 +6396,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
|
||||
struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
|
||||
|
||||
if (proto == IPPROTO_TCP)
|
||||
sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
|
||||
sk = __inet6_lookup(net, hinfo, NULL, 0,
|
||||
src6, tuple->ipv6.sport,
|
||||
dst6, ntohs(tuple->ipv6.dport),
|
||||
dif, sdif, &refcounted);
|
||||
|
@ -1197,6 +1197,8 @@ static int __init dccp_init(void)
|
||||
INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
|
||||
}
|
||||
|
||||
dccp_hashinfo.pernet = false;
|
||||
|
||||
rc = dccp_mib_init();
|
||||
if (rc)
|
||||
goto out_free_dccp_bhash2;
|
||||
|
@ -1250,7 +1250,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
|
||||
}
|
||||
|
||||
prev_addr_hashbucket =
|
||||
inet_bhashfn_portaddr(sk->sk_prot->h.hashinfo, sk,
|
||||
inet_bhashfn_portaddr(tcp_or_dccp_get_hashinfo(sk), sk,
|
||||
sock_net(sk), inet->inet_num);
|
||||
|
||||
inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
|
||||
|
@ -134,6 +134,7 @@ static void esp_free_tcp_sk(struct rcu_head *head)
|
||||
static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
|
||||
{
|
||||
struct xfrm_encap_tmpl *encap = x->encap;
|
||||
struct net *net = xs_net(x);
|
||||
struct esp_tcp_sk *esk;
|
||||
__be16 sport, dport;
|
||||
struct sock *nsk;
|
||||
@ -160,7 +161,7 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
|
||||
}
|
||||
spin_unlock_bh(&x->lock);
|
||||
|
||||
sk = inet_lookup_established(xs_net(x), &tcp_hashinfo, x->id.daddr.a4,
|
||||
sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4,
|
||||
dport, x->props.saddr.a4, sport, 0);
|
||||
if (!sk)
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
@ -285,16 +285,14 @@ inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret,
|
||||
struct inet_bind2_bucket **tb2_ret,
|
||||
struct inet_bind_hashbucket **head2_ret, int *port_ret)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
|
||||
int port = 0;
|
||||
struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
|
||||
int i, low, high, attempt_half, port, l3mdev;
|
||||
struct inet_bind_hashbucket *head, *head2;
|
||||
struct net *net = sock_net(sk);
|
||||
bool relax = false;
|
||||
int i, low, high, attempt_half;
|
||||
struct inet_bind2_bucket *tb2;
|
||||
struct inet_bind_bucket *tb;
|
||||
u32 remaining, offset;
|
||||
int l3mdev;
|
||||
bool relax = false;
|
||||
|
||||
l3mdev = inet_sk_bound_l3mdev(sk);
|
||||
ports_exhausted:
|
||||
@ -469,17 +467,16 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
|
||||
*/
|
||||
int inet_csk_get_port(struct sock *sk, unsigned short snum)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
|
||||
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
|
||||
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
|
||||
int ret = 1, port = snum;
|
||||
struct net *net = sock_net(sk);
|
||||
bool found_port = false, check_bind_conflict = true;
|
||||
bool bhash_created = false, bhash2_created = false;
|
||||
struct inet_bind_hashbucket *head, *head2;
|
||||
struct inet_bind2_bucket *tb2 = NULL;
|
||||
struct inet_bind_bucket *tb = NULL;
|
||||
bool head2_lock_acquired = false;
|
||||
int l3mdev;
|
||||
int ret = 1, port = snum, l3mdev;
|
||||
struct net *net = sock_net(sk);
|
||||
|
||||
l3mdev = inet_sk_bound_l3mdev(sk);
|
||||
|
||||
@ -909,14 +906,15 @@ static void reqsk_migrate_reset(struct request_sock *req)
|
||||
/* return true if req was found in the ehash table */
|
||||
static bool reqsk_queue_unlink(struct request_sock *req)
|
||||
{
|
||||
struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
|
||||
struct sock *sk = req_to_sk(req);
|
||||
bool found = false;
|
||||
|
||||
if (sk_hashed(req_to_sk(req))) {
|
||||
if (sk_hashed(sk)) {
|
||||
struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
|
||||
spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
|
||||
|
||||
spin_lock(lock);
|
||||
found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
|
||||
found = __sk_nulls_del_node_init_rcu(sk);
|
||||
spin_unlock(lock);
|
||||
}
|
||||
if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
|
||||
|
@ -168,14 +168,15 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
|
||||
*/
|
||||
static void __inet_put_port(struct sock *sk)
|
||||
{
|
||||
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
||||
const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
|
||||
hashinfo->bhash_size);
|
||||
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
|
||||
struct inet_bind_hashbucket *head2 =
|
||||
inet_bhashfn_portaddr(hashinfo, sk, sock_net(sk),
|
||||
inet_sk(sk)->inet_num);
|
||||
struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
|
||||
struct inet_bind_hashbucket *head, *head2;
|
||||
struct net *net = sock_net(sk);
|
||||
struct inet_bind_bucket *tb;
|
||||
int bhash;
|
||||
|
||||
bhash = inet_bhashfn(net, inet_sk(sk)->inet_num, hashinfo->bhash_size);
|
||||
head = &hashinfo->bhash[bhash];
|
||||
head2 = inet_bhashfn_portaddr(hashinfo, sk, net, inet_sk(sk)->inet_num);
|
||||
|
||||
spin_lock(&head->lock);
|
||||
tb = inet_csk(sk)->icsk_bind_hash;
|
||||
@ -207,19 +208,19 @@ EXPORT_SYMBOL(inet_put_port);
|
||||
|
||||
int __inet_inherit_port(const struct sock *sk, struct sock *child)
|
||||
{
|
||||
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
|
||||
struct inet_hashinfo *table = tcp_or_dccp_get_hashinfo(sk);
|
||||
unsigned short port = inet_sk(child)->inet_num;
|
||||
const int bhash = inet_bhashfn(sock_net(sk), port,
|
||||
table->bhash_size);
|
||||
struct inet_bind_hashbucket *head = &table->bhash[bhash];
|
||||
struct inet_bind_hashbucket *head2 =
|
||||
inet_bhashfn_portaddr(table, child, sock_net(sk), port);
|
||||
struct inet_bind_hashbucket *head, *head2;
|
||||
bool created_inet_bind_bucket = false;
|
||||
bool update_fastreuse = false;
|
||||
struct net *net = sock_net(sk);
|
||||
bool update_fastreuse = false;
|
||||
struct inet_bind2_bucket *tb2;
|
||||
struct inet_bind_bucket *tb;
|
||||
int l3mdev;
|
||||
int bhash, l3mdev;
|
||||
|
||||
bhash = inet_bhashfn(net, port, table->bhash_size);
|
||||
head = &table->bhash[bhash];
|
||||
head2 = inet_bhashfn_portaddr(table, child, net, port);
|
||||
|
||||
spin_lock(&head->lock);
|
||||
spin_lock(&head2->lock);
|
||||
@ -385,7 +386,7 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net,
|
||||
struct sock *sk, *reuse_sk;
|
||||
bool no_reuseport;
|
||||
|
||||
if (hashinfo != &tcp_hashinfo)
|
||||
if (hashinfo != net->ipv4.tcp_death_row.hashinfo)
|
||||
return NULL; /* only TCP is supported */
|
||||
|
||||
no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport,
|
||||
@ -628,9 +629,9 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk,
|
||||
*/
|
||||
bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
|
||||
{
|
||||
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
||||
struct hlist_nulls_head *list;
|
||||
struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
|
||||
struct inet_ehash_bucket *head;
|
||||
struct hlist_nulls_head *list;
|
||||
spinlock_t *lock;
|
||||
bool ret = true;
|
||||
|
||||
@ -700,7 +701,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
|
||||
|
||||
int __inet_hash(struct sock *sk, struct sock *osk)
|
||||
{
|
||||
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
||||
struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
|
||||
struct inet_listen_hashbucket *ilb2;
|
||||
int err = 0;
|
||||
|
||||
@ -746,7 +747,7 @@ EXPORT_SYMBOL_GPL(inet_hash);
|
||||
|
||||
void inet_unhash(struct sock *sk)
|
||||
{
|
||||
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
||||
struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
|
||||
|
||||
if (sk_unhashed(sk))
|
||||
return;
|
||||
@ -833,7 +834,7 @@ inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net
|
||||
struct inet_bind_hashbucket *
|
||||
inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
|
||||
struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
|
||||
u32 hash;
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
struct in6_addr addr_any = {};
|
||||
@ -849,7 +850,7 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in
|
||||
|
||||
int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
|
||||
struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
|
||||
struct inet_bind2_bucket *tb2, *new_tb2;
|
||||
int l3mdev = inet_sk_bound_l3mdev(sk);
|
||||
struct inet_bind_hashbucket *head2;
|
||||
@ -1144,3 +1145,50 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);
|
||||
|
||||
struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo,
|
||||
unsigned int ehash_entries)
|
||||
{
|
||||
struct inet_hashinfo *new_hashinfo;
|
||||
int i;
|
||||
|
||||
new_hashinfo = kmemdup(hashinfo, sizeof(*hashinfo), GFP_KERNEL);
|
||||
if (!new_hashinfo)
|
||||
goto err;
|
||||
|
||||
new_hashinfo->ehash = vmalloc_huge(ehash_entries * sizeof(struct inet_ehash_bucket),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!new_hashinfo->ehash)
|
||||
goto free_hashinfo;
|
||||
|
||||
new_hashinfo->ehash_mask = ehash_entries - 1;
|
||||
|
||||
if (inet_ehash_locks_alloc(new_hashinfo))
|
||||
goto free_ehash;
|
||||
|
||||
for (i = 0; i < ehash_entries; i++)
|
||||
INIT_HLIST_NULLS_HEAD(&new_hashinfo->ehash[i].chain, i);
|
||||
|
||||
new_hashinfo->pernet = true;
|
||||
|
||||
return new_hashinfo;
|
||||
|
||||
free_ehash:
|
||||
vfree(new_hashinfo->ehash);
|
||||
free_hashinfo:
|
||||
kfree(new_hashinfo);
|
||||
err:
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_alloc);
|
||||
|
||||
void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo)
|
||||
{
|
||||
if (!hashinfo->pernet)
|
||||
return;
|
||||
|
||||
inet_ehash_locks_free(hashinfo);
|
||||
vfree(hashinfo->ehash);
|
||||
kfree(hashinfo);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_free);
|
||||
|
@ -59,9 +59,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
|
||||
inet_twsk_bind_unhash(tw, hashinfo);
|
||||
spin_unlock(&bhead->lock);
|
||||
|
||||
if (refcount_dec_and_test(&tw->tw_dr->tw_refcount))
|
||||
kfree(tw->tw_dr);
|
||||
|
||||
refcount_dec(&tw->tw_dr->tw_refcount);
|
||||
inet_twsk_put(tw);
|
||||
}
|
||||
|
||||
|
@ -71,8 +71,8 @@ nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
|
||||
{
|
||||
switch (protocol) {
|
||||
case IPPROTO_TCP:
|
||||
return inet_lookup(net, &tcp_hashinfo, skb, doff,
|
||||
saddr, sport, daddr, dport,
|
||||
return inet_lookup(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
skb, doff, saddr, sport, daddr, dport,
|
||||
in->ifindex);
|
||||
case IPPROTO_UDP:
|
||||
return udp4_lib_lookup(net, saddr, sport, daddr, dport,
|
||||
|
@ -79,6 +79,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
|
||||
const struct net_device *in,
|
||||
const enum nf_tproxy_lookup_t lookup_type)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
|
||||
struct sock *sk;
|
||||
|
||||
switch (protocol) {
|
||||
@ -92,12 +93,10 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
|
||||
|
||||
switch (lookup_type) {
|
||||
case NF_TPROXY_LOOKUP_LISTENER:
|
||||
sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
|
||||
ip_hdrlen(skb) +
|
||||
__tcp_hdrlen(hp),
|
||||
saddr, sport,
|
||||
daddr, dport,
|
||||
in->ifindex, 0);
|
||||
sk = inet_lookup_listener(net, hinfo, skb,
|
||||
ip_hdrlen(skb) + __tcp_hdrlen(hp),
|
||||
saddr, sport, daddr, dport,
|
||||
in->ifindex, 0);
|
||||
|
||||
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
|
||||
sk = NULL;
|
||||
@ -108,9 +107,8 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
|
||||
*/
|
||||
break;
|
||||
case NF_TPROXY_LOOKUP_ESTABLISHED:
|
||||
sk = inet_lookup_established(net, &tcp_hashinfo,
|
||||
saddr, sport, daddr, dport,
|
||||
in->ifindex);
|
||||
sk = inet_lookup_established(net, hinfo, saddr, sport,
|
||||
daddr, dport, in->ifindex);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
|
@ -59,7 +59,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
|
||||
socket_seq_show(seq);
|
||||
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
|
||||
sock_prot_inuse_get(net, &tcp_prot), orphans,
|
||||
refcount_read(&net->ipv4.tcp_death_row->tw_refcount) - 1,
|
||||
refcount_read(&net->ipv4.tcp_death_row.tw_refcount) - 1,
|
||||
sockets, proto_memory_allocated(&tcp_prot));
|
||||
seq_printf(seq, "UDP: inuse %d mem %ld\n",
|
||||
sock_prot_inuse_get(net, &udp_prot),
|
||||
|
@ -39,6 +39,7 @@ static u32 u32_max_div_HZ = UINT_MAX / HZ;
|
||||
static int one_day_secs = 24 * 3600;
|
||||
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
|
||||
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
|
||||
static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024;
|
||||
|
||||
/* obsolete */
|
||||
static int sysctl_tcp_low_latency __read_mostly;
|
||||
@ -382,6 +383,29 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int proc_tcp_ehash_entries(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
struct net *net = container_of(table->data, struct net,
|
||||
ipv4.sysctl_tcp_child_ehash_entries);
|
||||
struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
|
||||
int tcp_ehash_entries;
|
||||
struct ctl_table tbl;
|
||||
|
||||
tcp_ehash_entries = hinfo->ehash_mask + 1;
|
||||
|
||||
/* A negative number indicates that the child netns
|
||||
* shares the global ehash.
|
||||
*/
|
||||
if (!net_eq(net, &init_net) && !hinfo->pernet)
|
||||
tcp_ehash_entries *= -1;
|
||||
|
||||
tbl.data = &tcp_ehash_entries;
|
||||
tbl.maxlen = sizeof(int);
|
||||
|
||||
return proc_dointvec(&tbl, write, buffer, lenp, ppos);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
||||
static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp,
|
||||
@ -530,10 +554,9 @@ static struct ctl_table ipv4_table[] = {
|
||||
};
|
||||
|
||||
static struct ctl_table ipv4_net_table[] = {
|
||||
/* tcp_max_tw_buckets must be first in this table. */
|
||||
{
|
||||
.procname = "tcp_max_tw_buckets",
|
||||
/* .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, */
|
||||
.data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
@ -1321,6 +1344,21 @@ static struct ctl_table ipv4_net_table[] = {
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_ehash_entries",
|
||||
.data = &init_net.ipv4.sysctl_tcp_child_ehash_entries,
|
||||
.mode = 0444,
|
||||
.proc_handler = proc_tcp_ehash_entries,
|
||||
},
|
||||
{
|
||||
.procname = "tcp_child_ehash_entries",
|
||||
.data = &init_net.ipv4.sysctl_tcp_child_ehash_entries,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_douintvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = &tcp_child_ehash_entries_max,
|
||||
},
|
||||
{
|
||||
.procname = "udp_rmem_min",
|
||||
.data = &init_net.ipv4.sysctl_udp_rmem_min,
|
||||
@ -1361,8 +1399,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
|
||||
if (!table)
|
||||
goto err_alloc;
|
||||
|
||||
/* skip first entry (sysctl_max_tw_buckets) */
|
||||
for (i = 1; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
|
||||
for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
|
||||
if (table[i].data) {
|
||||
/* Update the variables to point into
|
||||
* the current struct net
|
||||
@ -1377,8 +1414,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
|
||||
}
|
||||
}
|
||||
|
||||
table[0].data = &net->ipv4.tcp_death_row->sysctl_max_tw_buckets;
|
||||
|
||||
net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
|
||||
if (!net->ipv4.ipv4_hdr)
|
||||
goto err_reg;
|
||||
|
@ -4790,6 +4790,7 @@ void __init tcp_init(void)
|
||||
INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
|
||||
}
|
||||
|
||||
tcp_hashinfo.pernet = false;
|
||||
|
||||
cnt = tcp_hashinfo.ehash_mask + 1;
|
||||
sysctl_tcp_max_orphans = cnt / 2;
|
||||
|
@ -181,13 +181,21 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
|
||||
static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
const struct inet_diag_req_v2 *r)
|
||||
{
|
||||
inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r);
|
||||
struct inet_hashinfo *hinfo;
|
||||
|
||||
hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo;
|
||||
|
||||
inet_diag_dump_icsk(hinfo, skb, cb, r);
|
||||
}
|
||||
|
||||
static int tcp_diag_dump_one(struct netlink_callback *cb,
|
||||
const struct inet_diag_req_v2 *req)
|
||||
{
|
||||
return inet_diag_dump_one_icsk(&tcp_hashinfo, cb, req);
|
||||
struct inet_hashinfo *hinfo;
|
||||
|
||||
hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo;
|
||||
|
||||
return inet_diag_dump_one_icsk(hinfo, cb, req);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_INET_DIAG_DESTROY
|
||||
@ -195,9 +203,13 @@ static int tcp_diag_destroy(struct sk_buff *in_skb,
|
||||
const struct inet_diag_req_v2 *req)
|
||||
{
|
||||
struct net *net = sock_net(in_skb->sk);
|
||||
struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
|
||||
struct inet_hashinfo *hinfo;
|
||||
struct sock *sk;
|
||||
int err;
|
||||
|
||||
hinfo = net->ipv4.tcp_death_row.hashinfo;
|
||||
sk = inet_diag_find_one_icsk(net, hinfo, req);
|
||||
|
||||
if (IS_ERR(sk))
|
||||
return PTR_ERR(sk);
|
||||
|
||||
|
@ -201,15 +201,16 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
{
|
||||
struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
|
||||
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
|
||||
struct inet_timewait_death_row *tcp_death_row;
|
||||
__be32 daddr, nexthop, prev_sk_rcv_saddr;
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct ip_options_rcu *inet_opt;
|
||||
struct net *net = sock_net(sk);
|
||||
__be16 orig_sport, orig_dport;
|
||||
struct flowi4 *fl4;
|
||||
struct rtable *rt;
|
||||
int err;
|
||||
struct ip_options_rcu *inet_opt;
|
||||
struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
|
||||
|
||||
if (addr_len < sizeof(struct sockaddr_in))
|
||||
return -EINVAL;
|
||||
@ -235,7 +236,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
if (IS_ERR(rt)) {
|
||||
err = PTR_ERR(rt);
|
||||
if (err == -ENETUNREACH)
|
||||
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
|
||||
IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -247,11 +248,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
if (!inet_opt || !inet_opt->opt.srr)
|
||||
daddr = fl4->daddr;
|
||||
|
||||
tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
|
||||
|
||||
if (!inet->inet_saddr) {
|
||||
if (inet_csk(sk)->icsk_bind2_hash) {
|
||||
prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo,
|
||||
sk, sock_net(sk),
|
||||
inet->inet_num);
|
||||
prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
|
||||
sk, net, inet->inet_num);
|
||||
prev_sk_rcv_saddr = sk->sk_rcv_saddr;
|
||||
}
|
||||
inet->inet_saddr = fl4->saddr;
|
||||
@ -317,8 +319,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
inet->inet_daddr,
|
||||
inet->inet_sport,
|
||||
usin->sin_port));
|
||||
tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
|
||||
inet->inet_saddr,
|
||||
tp->tsoffset = secure_tcp_ts_off(net, inet->inet_saddr,
|
||||
inet->inet_daddr);
|
||||
}
|
||||
|
||||
@ -494,9 +495,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
|
||||
int err;
|
||||
struct net *net = dev_net(skb->dev);
|
||||
|
||||
sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
|
||||
th->dest, iph->saddr, ntohs(th->source),
|
||||
inet_iif(skb), 0);
|
||||
sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
iph->daddr, th->dest, iph->saddr,
|
||||
ntohs(th->source), inet_iif(skb), 0);
|
||||
if (!sk) {
|
||||
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
|
||||
return -ENOENT;
|
||||
@ -759,8 +760,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
|
||||
* Incoming packet is checked with md5 hash with finding key,
|
||||
* no RST generated if md5 hash doesn't match.
|
||||
*/
|
||||
sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
|
||||
ip_hdr(skb)->saddr,
|
||||
sk1 = __inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
NULL, 0, ip_hdr(skb)->saddr,
|
||||
th->source, ip_hdr(skb)->daddr,
|
||||
ntohs(th->source), dif, sdif);
|
||||
/* don't send rst if it can't find key */
|
||||
@ -1728,6 +1729,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv);
|
||||
|
||||
int tcp_v4_early_demux(struct sk_buff *skb)
|
||||
{
|
||||
struct net *net = dev_net(skb->dev);
|
||||
const struct iphdr *iph;
|
||||
const struct tcphdr *th;
|
||||
struct sock *sk;
|
||||
@ -1744,7 +1746,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
|
||||
if (th->doff < sizeof(struct tcphdr) / 4)
|
||||
return 0;
|
||||
|
||||
sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
|
||||
sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
iph->saddr, th->source,
|
||||
iph->daddr, ntohs(th->dest),
|
||||
skb->skb_iif, inet_sdif(skb));
|
||||
@ -1970,7 +1972,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
||||
th = (const struct tcphdr *)skb->data;
|
||||
iph = ip_hdr(skb);
|
||||
lookup:
|
||||
sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
|
||||
sk = __inet_lookup_skb(net->ipv4.tcp_death_row.hashinfo,
|
||||
skb, __tcp_hdrlen(th), th->source,
|
||||
th->dest, sdif, &refcounted);
|
||||
if (!sk)
|
||||
goto no_tcp_socket;
|
||||
@ -2152,9 +2155,9 @@ do_time_wait:
|
||||
}
|
||||
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
|
||||
case TCP_TW_SYN: {
|
||||
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
|
||||
&tcp_hashinfo, skb,
|
||||
__tcp_hdrlen(th),
|
||||
struct sock *sk2 = inet_lookup_listener(net,
|
||||
net->ipv4.tcp_death_row.hashinfo,
|
||||
skb, __tcp_hdrlen(th),
|
||||
iph->saddr, th->source,
|
||||
iph->daddr, th->dest,
|
||||
inet_iif(skb),
|
||||
@ -2304,15 +2307,16 @@ static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
|
||||
*/
|
||||
static void *listening_get_first(struct seq_file *seq)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
|
||||
struct tcp_iter_state *st = seq->private;
|
||||
|
||||
st->offset = 0;
|
||||
for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
|
||||
for (; st->bucket <= hinfo->lhash2_mask; st->bucket++) {
|
||||
struct inet_listen_hashbucket *ilb2;
|
||||
struct hlist_nulls_node *node;
|
||||
struct sock *sk;
|
||||
|
||||
ilb2 = &tcp_hashinfo.lhash2[st->bucket];
|
||||
ilb2 = &hinfo->lhash2[st->bucket];
|
||||
if (hlist_nulls_empty(&ilb2->nulls_head))
|
||||
continue;
|
||||
|
||||
@ -2337,6 +2341,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
|
||||
struct tcp_iter_state *st = seq->private;
|
||||
struct inet_listen_hashbucket *ilb2;
|
||||
struct hlist_nulls_node *node;
|
||||
struct inet_hashinfo *hinfo;
|
||||
struct sock *sk = cur;
|
||||
|
||||
++st->num;
|
||||
@ -2348,7 +2353,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
|
||||
return sk;
|
||||
}
|
||||
|
||||
ilb2 = &tcp_hashinfo.lhash2[st->bucket];
|
||||
hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
|
||||
ilb2 = &hinfo->lhash2[st->bucket];
|
||||
spin_unlock(&ilb2->lock);
|
||||
++st->bucket;
|
||||
return listening_get_first(seq);
|
||||
@ -2370,9 +2376,10 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline bool empty_bucket(const struct tcp_iter_state *st)
|
||||
static inline bool empty_bucket(struct inet_hashinfo *hinfo,
|
||||
const struct tcp_iter_state *st)
|
||||
{
|
||||
return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
|
||||
return hlist_nulls_empty(&hinfo->ehash[st->bucket].chain);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2381,20 +2388,21 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
|
||||
*/
|
||||
static void *established_get_first(struct seq_file *seq)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
|
||||
struct tcp_iter_state *st = seq->private;
|
||||
|
||||
st->offset = 0;
|
||||
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
|
||||
for (; st->bucket <= hinfo->ehash_mask; ++st->bucket) {
|
||||
struct sock *sk;
|
||||
struct hlist_nulls_node *node;
|
||||
spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
|
||||
spinlock_t *lock = inet_ehash_lockp(hinfo, st->bucket);
|
||||
|
||||
/* Lockless fast path for the common case of empty buckets */
|
||||
if (empty_bucket(st))
|
||||
if (empty_bucket(hinfo, st))
|
||||
continue;
|
||||
|
||||
spin_lock_bh(lock);
|
||||
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
|
||||
sk_nulls_for_each(sk, node, &hinfo->ehash[st->bucket].chain) {
|
||||
if (seq_sk_match(seq, sk))
|
||||
return sk;
|
||||
}
|
||||
@ -2406,9 +2414,10 @@ static void *established_get_first(struct seq_file *seq)
|
||||
|
||||
static void *established_get_next(struct seq_file *seq, void *cur)
|
||||
{
|
||||
struct sock *sk = cur;
|
||||
struct hlist_nulls_node *node;
|
||||
struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
|
||||
struct tcp_iter_state *st = seq->private;
|
||||
struct hlist_nulls_node *node;
|
||||
struct sock *sk = cur;
|
||||
|
||||
++st->num;
|
||||
++st->offset;
|
||||
@ -2420,7 +2429,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
|
||||
return sk;
|
||||
}
|
||||
|
||||
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
||||
spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
|
||||
++st->bucket;
|
||||
return established_get_first(seq);
|
||||
}
|
||||
@ -2458,6 +2467,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
|
||||
|
||||
static void *tcp_seek_last_pos(struct seq_file *seq)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
|
||||
struct tcp_iter_state *st = seq->private;
|
||||
int bucket = st->bucket;
|
||||
int offset = st->offset;
|
||||
@ -2466,7 +2476,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
|
||||
|
||||
switch (st->state) {
|
||||
case TCP_SEQ_STATE_LISTENING:
|
||||
if (st->bucket > tcp_hashinfo.lhash2_mask)
|
||||
if (st->bucket > hinfo->lhash2_mask)
|
||||
break;
|
||||
st->state = TCP_SEQ_STATE_LISTENING;
|
||||
rc = listening_get_first(seq);
|
||||
@ -2478,7 +2488,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
|
||||
st->state = TCP_SEQ_STATE_ESTABLISHED;
|
||||
fallthrough;
|
||||
case TCP_SEQ_STATE_ESTABLISHED:
|
||||
if (st->bucket > tcp_hashinfo.ehash_mask)
|
||||
if (st->bucket > hinfo->ehash_mask)
|
||||
break;
|
||||
rc = established_get_first(seq);
|
||||
while (offset-- && rc && bucket == st->bucket)
|
||||
@ -2546,16 +2556,17 @@ EXPORT_SYMBOL(tcp_seq_next);
|
||||
|
||||
void tcp_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
|
||||
struct tcp_iter_state *st = seq->private;
|
||||
|
||||
switch (st->state) {
|
||||
case TCP_SEQ_STATE_LISTENING:
|
||||
if (v != SEQ_START_TOKEN)
|
||||
spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
|
||||
spin_unlock(&hinfo->lhash2[st->bucket].lock);
|
||||
break;
|
||||
case TCP_SEQ_STATE_ESTABLISHED:
|
||||
if (v)
|
||||
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
||||
spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -2750,6 +2761,7 @@ static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
|
||||
static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
|
||||
struct sock *start_sk)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
|
||||
struct bpf_tcp_iter_state *iter = seq->private;
|
||||
struct tcp_iter_state *st = &iter->state;
|
||||
struct hlist_nulls_node *node;
|
||||
@ -2769,7 +2781,7 @@ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
|
||||
expected++;
|
||||
}
|
||||
}
|
||||
spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
|
||||
spin_unlock(&hinfo->lhash2[st->bucket].lock);
|
||||
|
||||
return expected;
|
||||
}
|
||||
@ -2777,6 +2789,7 @@ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
|
||||
static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
|
||||
struct sock *start_sk)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
|
||||
struct bpf_tcp_iter_state *iter = seq->private;
|
||||
struct tcp_iter_state *st = &iter->state;
|
||||
struct hlist_nulls_node *node;
|
||||
@ -2796,13 +2809,14 @@ static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
|
||||
expected++;
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
|
||||
spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
|
||||
|
||||
return expected;
|
||||
}
|
||||
|
||||
static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
|
||||
struct bpf_tcp_iter_state *iter = seq->private;
|
||||
struct tcp_iter_state *st = &iter->state;
|
||||
unsigned int expected;
|
||||
@ -2818,7 +2832,7 @@ static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
|
||||
st->offset = 0;
|
||||
st->bucket++;
|
||||
if (st->state == TCP_SEQ_STATE_LISTENING &&
|
||||
st->bucket > tcp_hashinfo.lhash2_mask) {
|
||||
st->bucket > hinfo->lhash2_mask) {
|
||||
st->state = TCP_SEQ_STATE_ESTABLISHED;
|
||||
st->bucket = 0;
|
||||
}
|
||||
@ -3083,7 +3097,7 @@ struct proto tcp_prot = {
|
||||
.slab_flags = SLAB_TYPESAFE_BY_RCU,
|
||||
.twsk_prot = &tcp_timewait_sock_ops,
|
||||
.rsk_prot = &tcp_request_sock_ops,
|
||||
.h.hashinfo = &tcp_hashinfo,
|
||||
.h.hashinfo = NULL,
|
||||
.no_autobind = true,
|
||||
.diag_destroy = tcp_abort,
|
||||
};
|
||||
@ -3091,19 +3105,43 @@ EXPORT_SYMBOL(tcp_prot);
|
||||
|
||||
static void __net_exit tcp_sk_exit(struct net *net)
|
||||
{
|
||||
struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row;
|
||||
|
||||
if (net->ipv4.tcp_congestion_control)
|
||||
bpf_module_put(net->ipv4.tcp_congestion_control,
|
||||
net->ipv4.tcp_congestion_control->owner);
|
||||
if (refcount_dec_and_test(&tcp_death_row->tw_refcount))
|
||||
kfree(tcp_death_row);
|
||||
}
|
||||
|
||||
static void __net_init tcp_set_hashinfo(struct net *net)
|
||||
{
|
||||
struct inet_hashinfo *hinfo;
|
||||
unsigned int ehash_entries;
|
||||
struct net *old_net;
|
||||
|
||||
if (net_eq(net, &init_net))
|
||||
goto fallback;
|
||||
|
||||
old_net = current->nsproxy->net_ns;
|
||||
ehash_entries = READ_ONCE(old_net->ipv4.sysctl_tcp_child_ehash_entries);
|
||||
if (!ehash_entries)
|
||||
goto fallback;
|
||||
|
||||
ehash_entries = roundup_pow_of_two(ehash_entries);
|
||||
hinfo = inet_pernet_hashinfo_alloc(&tcp_hashinfo, ehash_entries);
|
||||
if (!hinfo) {
|
||||
pr_warn("Failed to allocate TCP ehash (entries: %u) "
|
||||
"for a netns, fallback to the global one\n",
|
||||
ehash_entries);
|
||||
fallback:
|
||||
hinfo = &tcp_hashinfo;
|
||||
ehash_entries = tcp_hashinfo.ehash_mask + 1;
|
||||
}
|
||||
|
||||
net->ipv4.tcp_death_row.hashinfo = hinfo;
|
||||
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = ehash_entries / 2;
|
||||
net->ipv4.sysctl_max_syn_backlog = max(128U, ehash_entries / 128);
|
||||
}
|
||||
|
||||
static int __net_init tcp_sk_init(struct net *net)
|
||||
{
|
||||
int cnt;
|
||||
|
||||
net->ipv4.sysctl_tcp_ecn = 2;
|
||||
net->ipv4.sysctl_tcp_ecn_fallback = 1;
|
||||
|
||||
@ -3129,15 +3167,9 @@ static int __net_init tcp_sk_init(struct net *net)
|
||||
net->ipv4.sysctl_tcp_tw_reuse = 2;
|
||||
net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
|
||||
|
||||
net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL);
|
||||
if (!net->ipv4.tcp_death_row)
|
||||
return -ENOMEM;
|
||||
refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1);
|
||||
cnt = tcp_hashinfo.ehash_mask + 1;
|
||||
net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2;
|
||||
net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo;
|
||||
refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1);
|
||||
tcp_set_hashinfo(net);
|
||||
|
||||
net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
|
||||
net->ipv4.sysctl_tcp_sack = 1;
|
||||
net->ipv4.sysctl_tcp_window_scaling = 1;
|
||||
net->ipv4.sysctl_tcp_timestamps = 1;
|
||||
@ -3199,10 +3231,13 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
|
||||
{
|
||||
struct net *net;
|
||||
|
||||
inet_twsk_purge(&tcp_hashinfo, AF_INET);
|
||||
tcp_twsk_purge(net_exit_list, AF_INET);
|
||||
|
||||
list_for_each_entry(net, net_exit_list, exit_list)
|
||||
list_for_each_entry(net, net_exit_list, exit_list) {
|
||||
inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo);
|
||||
WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount));
|
||||
tcp_fastopen_ctx_destroy(net);
|
||||
}
|
||||
}
|
||||
|
||||
static struct pernet_operations __net_initdata tcp_sk_ops = {
|
||||
|
@ -247,10 +247,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
||||
{
|
||||
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
const struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct net *net = sock_net(sk);
|
||||
struct inet_timewait_sock *tw;
|
||||
struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
|
||||
|
||||
tw = inet_twsk_alloc(sk, tcp_death_row, state);
|
||||
tw = inet_twsk_alloc(sk, &net->ipv4.tcp_death_row, state);
|
||||
|
||||
if (tw) {
|
||||
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
|
||||
@ -319,14 +319,14 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
||||
/* Linkage updates.
|
||||
* Note that access to tw after this point is illegal.
|
||||
*/
|
||||
inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
|
||||
inet_twsk_hashdance(tw, sk, net->ipv4.tcp_death_row.hashinfo);
|
||||
local_bh_enable();
|
||||
} else {
|
||||
/* Sorry, if we're out of memory, just CLOSE this
|
||||
* socket up. We've got bigger problems than
|
||||
* non-graceful socket closings.
|
||||
*/
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
|
||||
NET_INC_STATS(net, LINUX_MIB_TCPTIMEWAITOVERFLOW);
|
||||
}
|
||||
|
||||
tcp_update_metrics(sk);
|
||||
@ -347,6 +347,26 @@ void tcp_twsk_destructor(struct sock *sk)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
|
||||
|
||||
void tcp_twsk_purge(struct list_head *net_exit_list, int family)
|
||||
{
|
||||
bool purged_once = false;
|
||||
struct net *net;
|
||||
|
||||
list_for_each_entry(net, net_exit_list, exit_list) {
|
||||
/* The last refcount is decremented in tcp_sk_exit_batch() */
|
||||
if (refcount_read(&net->ipv4.tcp_death_row.tw_refcount) == 1)
|
||||
continue;
|
||||
|
||||
if (net->ipv4.tcp_death_row.hashinfo->pernet) {
|
||||
inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family);
|
||||
} else if (!purged_once) {
|
||||
inet_twsk_purge(&tcp_hashinfo, family);
|
||||
purged_once = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tcp_twsk_purge);
|
||||
|
||||
/* Warning : This function is called without sk_listener being locked.
|
||||
* Be sure to read socket fields once, as their value could change under us.
|
||||
*/
|
||||
|
@ -151,6 +151,7 @@ static void esp_free_tcp_sk(struct rcu_head *head)
|
||||
static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
|
||||
{
|
||||
struct xfrm_encap_tmpl *encap = x->encap;
|
||||
struct net *net = xs_net(x);
|
||||
struct esp_tcp_sk *esk;
|
||||
__be16 sport, dport;
|
||||
struct sock *nsk;
|
||||
@ -177,7 +178,7 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
|
||||
}
|
||||
spin_unlock_bh(&x->lock);
|
||||
|
||||
sk = __inet6_lookup_established(xs_net(x), &tcp_hashinfo, &x->id.daddr.in6,
|
||||
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6,
|
||||
dport, &x->props.saddr.in6, ntohs(sport), 0, 0);
|
||||
if (!sk)
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
@ -21,8 +21,6 @@
|
||||
#include <net/ip.h>
|
||||
#include <net/sock_reuseport.h>
|
||||
|
||||
extern struct inet_hashinfo tcp_hashinfo;
|
||||
|
||||
u32 inet6_ehashfn(const struct net *net,
|
||||
const struct in6_addr *laddr, const u16 lport,
|
||||
const struct in6_addr *faddr, const __be16 fport)
|
||||
@ -169,7 +167,7 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net,
|
||||
struct sock *sk, *reuse_sk;
|
||||
bool no_reuseport;
|
||||
|
||||
if (hashinfo != &tcp_hashinfo)
|
||||
if (hashinfo != net->ipv4.tcp_death_row.hashinfo)
|
||||
return NULL; /* only TCP is supported */
|
||||
|
||||
no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport,
|
||||
|
@ -83,8 +83,8 @@ nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
|
||||
{
|
||||
switch (protocol) {
|
||||
case IPPROTO_TCP:
|
||||
return inet6_lookup(net, &tcp_hashinfo, skb, doff,
|
||||
saddr, sport, daddr, dport,
|
||||
return inet6_lookup(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
skb, doff, saddr, sport, daddr, dport,
|
||||
in->ifindex);
|
||||
case IPPROTO_UDP:
|
||||
return udp6_lib_lookup(net, saddr, sport, daddr, dport,
|
||||
|
@ -80,6 +80,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
|
||||
const struct net_device *in,
|
||||
const enum nf_tproxy_lookup_t lookup_type)
|
||||
{
|
||||
struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
|
||||
struct sock *sk;
|
||||
|
||||
switch (protocol) {
|
||||
@ -93,7 +94,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
|
||||
|
||||
switch (lookup_type) {
|
||||
case NF_TPROXY_LOOKUP_LISTENER:
|
||||
sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
|
||||
sk = inet6_lookup_listener(net, hinfo, skb,
|
||||
thoff + __tcp_hdrlen(hp),
|
||||
saddr, sport,
|
||||
daddr, ntohs(dport),
|
||||
@ -108,9 +109,8 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
|
||||
*/
|
||||
break;
|
||||
case NF_TPROXY_LOOKUP_ESTABLISHED:
|
||||
sk = __inet6_lookup_established(net, &tcp_hashinfo,
|
||||
saddr, sport, daddr, ntohs(dport),
|
||||
in->ifindex, 0);
|
||||
sk = __inet6_lookup_established(net, hinfo, saddr, sport, daddr,
|
||||
ntohs(dport), in->ifindex, 0);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
|
@ -146,15 +146,16 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
int addr_len)
|
||||
{
|
||||
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
struct in6_addr *saddr = NULL, *final_p, final;
|
||||
struct inet_timewait_death_row *tcp_death_row;
|
||||
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct in6_addr *saddr = NULL, *final_p, final;
|
||||
struct net *net = sock_net(sk);
|
||||
struct ipv6_txoptions *opt;
|
||||
struct flowi6 fl6;
|
||||
struct dst_entry *dst;
|
||||
struct flowi6 fl6;
|
||||
int addr_type;
|
||||
int err;
|
||||
|
||||
@ -280,20 +281,21 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
|
||||
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
|
||||
|
||||
dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
|
||||
dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
|
||||
if (IS_ERR(dst)) {
|
||||
err = PTR_ERR(dst);
|
||||
goto failure;
|
||||
}
|
||||
|
||||
tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
|
||||
|
||||
if (!saddr) {
|
||||
struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
|
||||
struct in6_addr prev_v6_rcv_saddr;
|
||||
|
||||
if (icsk->icsk_bind2_hash) {
|
||||
prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo,
|
||||
sk, sock_net(sk),
|
||||
inet->inet_num);
|
||||
prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
|
||||
sk, net, inet->inet_num);
|
||||
prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
|
||||
}
|
||||
saddr = &fl6.saddr;
|
||||
@ -325,7 +327,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
inet->inet_dport = usin->sin6_port;
|
||||
|
||||
tcp_set_state(sk, TCP_SYN_SENT);
|
||||
tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
|
||||
err = inet6_hash_connect(tcp_death_row, sk);
|
||||
if (err)
|
||||
goto late_failure;
|
||||
@ -339,8 +340,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
sk->sk_v6_daddr.s6_addr32,
|
||||
inet->inet_sport,
|
||||
inet->inet_dport));
|
||||
tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
|
||||
np->saddr.s6_addr32,
|
||||
tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
|
||||
sk->sk_v6_daddr.s6_addr32);
|
||||
}
|
||||
|
||||
@ -403,7 +403,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
|
||||
bool fatal;
|
||||
int err;
|
||||
|
||||
sk = __inet6_lookup_established(net, &tcp_hashinfo,
|
||||
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
&hdr->daddr, th->dest,
|
||||
&hdr->saddr, ntohs(th->source),
|
||||
skb->dev->ifindex, inet6_sdif(skb));
|
||||
@ -1036,11 +1036,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
|
||||
* Incoming packet is checked with md5 hash with finding key,
|
||||
* no RST generated if md5 hash doesn't match.
|
||||
*/
|
||||
sk1 = inet6_lookup_listener(net,
|
||||
&tcp_hashinfo, NULL, 0,
|
||||
&ipv6h->saddr,
|
||||
th->source, &ipv6h->daddr,
|
||||
ntohs(th->source), dif, sdif);
|
||||
sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
NULL, 0, &ipv6h->saddr, th->source,
|
||||
&ipv6h->daddr, ntohs(th->source),
|
||||
dif, sdif);
|
||||
if (!sk1)
|
||||
goto out;
|
||||
|
||||
@ -1638,7 +1637,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
|
||||
hdr = ipv6_hdr(skb);
|
||||
|
||||
lookup:
|
||||
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
|
||||
sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
|
||||
th->source, th->dest, inet6_iif(skb), sdif,
|
||||
&refcounted);
|
||||
if (!sk)
|
||||
@ -1813,7 +1812,7 @@ do_time_wait:
|
||||
{
|
||||
struct sock *sk2;
|
||||
|
||||
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
|
||||
sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
skb, __tcp_hdrlen(th),
|
||||
&ipv6_hdr(skb)->saddr, th->source,
|
||||
&ipv6_hdr(skb)->daddr,
|
||||
@ -1846,6 +1845,7 @@ do_time_wait:
|
||||
|
||||
void tcp_v6_early_demux(struct sk_buff *skb)
|
||||
{
|
||||
struct net *net = dev_net(skb->dev);
|
||||
const struct ipv6hdr *hdr;
|
||||
const struct tcphdr *th;
|
||||
struct sock *sk;
|
||||
@ -1863,7 +1863,7 @@ void tcp_v6_early_demux(struct sk_buff *skb)
|
||||
return;
|
||||
|
||||
/* Note : We use inet6_iif() here, not tcp_v6_iif() */
|
||||
sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
|
||||
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
|
||||
&hdr->saddr, th->source,
|
||||
&hdr->daddr, ntohs(th->dest),
|
||||
inet6_iif(skb), inet6_sdif(skb));
|
||||
@ -2195,7 +2195,7 @@ struct proto tcpv6_prot = {
|
||||
.slab_flags = SLAB_TYPESAFE_BY_RCU,
|
||||
.twsk_prot = &tcp6_timewait_sock_ops,
|
||||
.rsk_prot = &tcp6_request_sock_ops,
|
||||
.h.hashinfo = &tcp_hashinfo,
|
||||
.h.hashinfo = NULL,
|
||||
.no_autobind = true,
|
||||
.diag_destroy = tcp_abort,
|
||||
};
|
||||
@ -2229,7 +2229,7 @@ static void __net_exit tcpv6_net_exit(struct net *net)
|
||||
|
||||
static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
|
||||
{
|
||||
inet_twsk_purge(&tcp_hashinfo, AF_INET6);
|
||||
tcp_twsk_purge(net_exit_list, AF_INET6);
|
||||
}
|
||||
|
||||
static struct pernet_operations tcpv6_net_ops = {
|
||||
|
@ -81,15 +81,18 @@ static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callba
|
||||
struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
|
||||
struct nlattr *bc = cb_data->inet_diag_nla_bc;
|
||||
struct net *net = sock_net(skb->sk);
|
||||
struct inet_hashinfo *hinfo;
|
||||
int i;
|
||||
|
||||
for (i = diag_ctx->l_slot; i <= tcp_hashinfo.lhash2_mask; i++) {
|
||||
hinfo = net->ipv4.tcp_death_row.hashinfo;
|
||||
|
||||
for (i = diag_ctx->l_slot; i <= hinfo->lhash2_mask; i++) {
|
||||
struct inet_listen_hashbucket *ilb;
|
||||
struct hlist_nulls_node *node;
|
||||
struct sock *sk;
|
||||
int num = 0;
|
||||
|
||||
ilb = &tcp_hashinfo.lhash2[i];
|
||||
ilb = &hinfo->lhash2[i];
|
||||
|
||||
rcu_read_lock();
|
||||
spin_lock(&ilb->lock);
|
||||
|
Loading…
Reference in New Issue
Block a user