linux/include/net/bond_alb.h

170 lines
6.1 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
*/
#ifndef _NET_BOND_ALB_H
#define _NET_BOND_ALB_H
#include <linux/if_ether.h>
struct bonding;
struct slave;
#define BOND_ALB_INFO(bond) ((bond)->alb_info)
#define SLAVE_TLB_INFO(slave) ((slave)->tlb_info)
#define ALB_TIMER_TICKS_PER_SEC 10 /* should be a divisor of HZ */
#define BOND_TLB_REBALANCE_INTERVAL 10 /* In seconds, periodic re-balancing.
* Used for division - never set
* to zero !!!
*/
#define BOND_ALB_DEFAULT_LP_INTERVAL 1
#define BOND_ALB_LP_INTERVAL(bond) (bond->params.lp_interval) /* In seconds, periodic send of
* learning packets to the switch
*/
#define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \
* ALB_TIMER_TICKS_PER_SEC)
#define BOND_ALB_LP_TICKS(bond) (BOND_ALB_LP_INTERVAL(bond) \
* ALB_TIMER_TICKS_PER_SEC)
#define TLB_HASH_TABLE_SIZE 256 /* The size of the clients hash table.
* Note that this value MUST NOT be smaller
* because the key hash table is BYTE wide !
*/
#define TLB_NULL_INDEX 0xffffffff
/* rlb defs */
#define RLB_HASH_TABLE_SIZE 256
#define RLB_NULL_INDEX 0xffffffff
#define RLB_UPDATE_DELAY (2*ALB_TIMER_TICKS_PER_SEC) /* 2 seconds */
#define RLB_ARP_BURST_SIZE 2
#define RLB_UPDATE_RETRY 3 /* 3-ticks - must be smaller than the rlb
* rebalance interval (5 min).
*/
/* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is
* promiscuous after failover
*/
#define RLB_PROMISC_TIMEOUT (10*ALB_TIMER_TICKS_PER_SEC)
struct tlb_client_info {
struct slave *tx_slave; /* A pointer to slave used for transmiting
* packets to a Client that the Hash function
* gave this entry index.
*/
u32 tx_bytes; /* Each Client accumulates the BytesTx that
* were transmitted to it, and after each
* CallBack the LoadHistory is divided
* by the balance interval
*/
u32 load_history; /* This field contains the amount of Bytes
* that were transmitted to this client by
* the server on the previous balance
* interval in Bps.
*/
u32 next; /* The next Hash table entry index, assigned
* to use the same adapter for transmit.
*/
u32 prev; /* The previous Hash table entry index,
* assigned to use the same
*/
};
/* -------------------------------------------------------------------------
* struct rlb_client_info contains all info related to a specific rx client
bonding: delete migrated IP addresses from the rlb hash table Bonding in balance-alb mode records information from ARP packets passing through the bond in a hash table (rx_hashtbl). At certain situations (e.g. link change of a slave), rlb_update_rx_clients() will send out ARP packets to update ARP caches of other hosts on the network to achieve RX load balancing. The problem is that once an IP address is recorded in the hash table, it stays there indefinitely. If this IP address is migrated to a different host in the network, bonding still sends out ARP packets that poison other systems' ARP caches with invalid information. This patch solves this by looking at all incoming ARP packets, and checking if the source IP address is one of the source addresses stored in the rx_hashtbl. If it is, but the MAC addresses differ, the corresponding hash table entries are removed. Thus, when an IP address is migrated, the first ARP broadcast by its new owner will purge the offending entries of rx_hashtbl. The hash table is hashed by ip_dst. To be able to do the above check efficiently (not walking the whole hash table), we need a reverse mapping (by ip_src). I added three new members in struct rlb_client_info: rx_hashtbl[x].src_first will point to the start of a list of entries for which hash(ip_src) == x. The list is linked with src_next and src_prev. When an incoming ARP packet arrives at rlb_arp_recv() rlb_purge_src_ip() can quickly walk only the entries on the corresponding lists, i.e. the entries that are likely to contain the offending IP address. To avoid confusion, I renamed these existing fields of struct rlb_client_info: next -> used_next prev -> used_prev rx_hashtbl_head -> rx_hashtbl_used_head (The current linked list is _not_ a list of hash table entries with colliding ip_dst. It's a list of entries that are being used; its purpose is to avoid walking the whole hash table when looking for used entries.) Signed-off-by: Jiri Bohac <jbohac@suse.cz> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-28 12:42:14 +08:00
* connection. This is the Clients Hash Table entry struct.
* Note that this is not a proper hash table; if a new client's IP address
* hash collides with an existing client entry, the old entry is replaced.
*
* There is a linked list (linked by the used_next and used_prev members)
* linking all the used entries of the hash table. This allows updating
* all the clients without walking over all the unused elements of the table.
*
* There are also linked lists of entries with identical hash(ip_src). These
* allow cleaning up the table from ip_src<->mac_src associations that have
* become outdated and would cause sending out invalid ARP updates to the
* network. These are linked by the (src_next and src_prev members).
* -------------------------------------------------------------------------
*/
struct rlb_client_info {
__be32 ip_src; /* the server IP address */
__be32 ip_dst; /* the client IP address */
bonding: delete migrated IP addresses from the rlb hash table Bonding in balance-alb mode records information from ARP packets passing through the bond in a hash table (rx_hashtbl). At certain situations (e.g. link change of a slave), rlb_update_rx_clients() will send out ARP packets to update ARP caches of other hosts on the network to achieve RX load balancing. The problem is that once an IP address is recorded in the hash table, it stays there indefinitely. If this IP address is migrated to a different host in the network, bonding still sends out ARP packets that poison other systems' ARP caches with invalid information. This patch solves this by looking at all incoming ARP packets, and checking if the source IP address is one of the source addresses stored in the rx_hashtbl. If it is, but the MAC addresses differ, the corresponding hash table entries are removed. Thus, when an IP address is migrated, the first ARP broadcast by its new owner will purge the offending entries of rx_hashtbl. The hash table is hashed by ip_dst. To be able to do the above check efficiently (not walking the whole hash table), we need a reverse mapping (by ip_src). I added three new members in struct rlb_client_info: rx_hashtbl[x].src_first will point to the start of a list of entries for which hash(ip_src) == x. The list is linked with src_next and src_prev. When an incoming ARP packet arrives at rlb_arp_recv() rlb_purge_src_ip() can quickly walk only the entries on the corresponding lists, i.e. the entries that are likely to contain the offending IP address. To avoid confusion, I renamed these existing fields of struct rlb_client_info: next -> used_next prev -> used_prev rx_hashtbl_head -> rx_hashtbl_used_head (The current linked list is _not_ a list of hash table entries with colliding ip_dst. It's a list of entries that are being used; its purpose is to avoid walking the whole hash table when looking for used entries.) Signed-off-by: Jiri Bohac <jbohac@suse.cz> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-28 12:42:14 +08:00
u8 mac_src[ETH_ALEN]; /* the server MAC address */
u8 mac_dst[ETH_ALEN]; /* the client MAC address */
bonding: delete migrated IP addresses from the rlb hash table Bonding in balance-alb mode records information from ARP packets passing through the bond in a hash table (rx_hashtbl). At certain situations (e.g. link change of a slave), rlb_update_rx_clients() will send out ARP packets to update ARP caches of other hosts on the network to achieve RX load balancing. The problem is that once an IP address is recorded in the hash table, it stays there indefinitely. If this IP address is migrated to a different host in the network, bonding still sends out ARP packets that poison other systems' ARP caches with invalid information. This patch solves this by looking at all incoming ARP packets, and checking if the source IP address is one of the source addresses stored in the rx_hashtbl. If it is, but the MAC addresses differ, the corresponding hash table entries are removed. Thus, when an IP address is migrated, the first ARP broadcast by its new owner will purge the offending entries of rx_hashtbl. The hash table is hashed by ip_dst. To be able to do the above check efficiently (not walking the whole hash table), we need a reverse mapping (by ip_src). I added three new members in struct rlb_client_info: rx_hashtbl[x].src_first will point to the start of a list of entries for which hash(ip_src) == x. The list is linked with src_next and src_prev. When an incoming ARP packet arrives at rlb_arp_recv() rlb_purge_src_ip() can quickly walk only the entries on the corresponding lists, i.e. the entries that are likely to contain the offending IP address. To avoid confusion, I renamed these existing fields of struct rlb_client_info: next -> used_next prev -> used_prev rx_hashtbl_head -> rx_hashtbl_used_head (The current linked list is _not_ a list of hash table entries with colliding ip_dst. It's a list of entries that are being used; its purpose is to avoid walking the whole hash table when looking for used entries.) Signed-off-by: Jiri Bohac <jbohac@suse.cz> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-28 12:42:14 +08:00
/* list of used hash table entries, starting at rx_hashtbl_used_head */
u32 used_next;
u32 used_prev;
/* ip_src based hashing */
u32 src_next; /* next entry with same hash(ip_src) */
u32 src_prev; /* prev entry with same hash(ip_src) */
u32 src_first; /* first entry with hash(ip_src) == this entry's index */
u8 assigned; /* checking whether this entry is assigned */
u8 ntt; /* flag - need to transmit client info */
struct slave *slave; /* the slave assigned to this client */
unsigned short vlan_id; /* VLAN tag associated with IP address */
};
struct tlb_slave_info {
u32 head; /* Index to the head of the bi-directional clients
* hash table entries list. The entries in the list
* are the entries that were assigned to use this
* slave for transmit.
*/
u32 load; /* Each slave sums the loadHistory of all clients
* assigned to it
*/
};
struct alb_bond_info {
struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
u32 unbalanced_load;
bonding: make tx_rebalance_counter an atomic KCSAN reported a data-race [1] around tx_rebalance_counter which can be accessed from different contexts, without the protection of a lock/mutex. [1] BUG: KCSAN: data-race in bond_alb_init_slave / bond_alb_monitor write to 0xffff888157e8ca24 of 4 bytes by task 7075 on cpu 0: bond_alb_init_slave+0x713/0x860 drivers/net/bonding/bond_alb.c:1613 bond_enslave+0xd94/0x3010 drivers/net/bonding/bond_main.c:1949 do_set_master net/core/rtnetlink.c:2521 [inline] __rtnl_newlink net/core/rtnetlink.c:3475 [inline] rtnl_newlink+0x1298/0x13b0 net/core/rtnetlink.c:3506 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5571 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2491 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5589 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x5fc/0x6c0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x6e1/0x7d0 net/netlink/af_netlink.c:1916 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2409 ___sys_sendmsg net/socket.c:2463 [inline] __sys_sendmsg+0x195/0x230 net/socket.c:2492 __do_sys_sendmsg net/socket.c:2501 [inline] __se_sys_sendmsg net/socket.c:2499 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2499 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff888157e8ca24 of 4 bytes by task 1082 on cpu 1: bond_alb_monitor+0x8f/0xc00 drivers/net/bonding/bond_alb.c:1511 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298 worker_thread+0x616/0xa70 kernel/workqueue.c:2445 kthread+0x2c7/0x2e0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 value changed: 0x00000001 -> 0x00000064 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 1082 Comm: kworker/u4:3 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: bond1 bond_alb_monitor Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-12-03 10:27:18 +08:00
atomic_t tx_rebalance_counter;
int lp_counter;
/* -------- rlb parameters -------- */
int rlb_enabled;
struct rlb_client_info *rx_hashtbl; /* Receive hash table */
bonding: delete migrated IP addresses from the rlb hash table Bonding in balance-alb mode records information from ARP packets passing through the bond in a hash table (rx_hashtbl). At certain situations (e.g. link change of a slave), rlb_update_rx_clients() will send out ARP packets to update ARP caches of other hosts on the network to achieve RX load balancing. The problem is that once an IP address is recorded in the hash table, it stays there indefinitely. If this IP address is migrated to a different host in the network, bonding still sends out ARP packets that poison other systems' ARP caches with invalid information. This patch solves this by looking at all incoming ARP packets, and checking if the source IP address is one of the source addresses stored in the rx_hashtbl. If it is, but the MAC addresses differ, the corresponding hash table entries are removed. Thus, when an IP address is migrated, the first ARP broadcast by its new owner will purge the offending entries of rx_hashtbl. The hash table is hashed by ip_dst. To be able to do the above check efficiently (not walking the whole hash table), we need a reverse mapping (by ip_src). I added three new members in struct rlb_client_info: rx_hashtbl[x].src_first will point to the start of a list of entries for which hash(ip_src) == x. The list is linked with src_next and src_prev. When an incoming ARP packet arrives at rlb_arp_recv() rlb_purge_src_ip() can quickly walk only the entries on the corresponding lists, i.e. the entries that are likely to contain the offending IP address. To avoid confusion, I renamed these existing fields of struct rlb_client_info: next -> used_next prev -> used_prev rx_hashtbl_head -> rx_hashtbl_used_head (The current linked list is _not_ a list of hash table entries with colliding ip_dst. It's a list of entries that are being used; its purpose is to avoid walking the whole hash table when looking for used entries.) Signed-off-by: Jiri Bohac <jbohac@suse.cz> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-28 12:42:14 +08:00
u32 rx_hashtbl_used_head;
u8 rx_ntt; /* flag - need to transmit
* to all rx clients
*/
struct slave *rx_slave;/* last slave to xmit from */
u8 primary_is_promisc; /* boolean */
u32 rlb_promisc_timeout_counter;/* counts primary
* promiscuity time
*/
u32 rlb_update_delay_counter;
u32 rlb_update_retry_counter;/* counter of retries
* of client update
*/
u8 rlb_rebalance; /* flag - indicates that the
* rx traffic should be
* rebalanced
*/
};
int bond_alb_initialize(struct bonding *bond, int rlb_enabled);
void bond_alb_deinitialize(struct bonding *bond);
int bond_alb_init_slave(struct bonding *bond, struct slave *slave);
void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave);
void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link);
void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave);
int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
struct sk_buff *skb);
struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
struct sk_buff *skb);
void bond_alb_monitor(struct work_struct *);
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
#endif /* _NET_BOND_ALB_H */