linux/include/net/bond_alb.h
Eric Dumazet 6dada2646a bonding: make tx_rebalance_counter an atomic
commit dac8e00fb6 upstream.

KCSAN reported a data-race [1] around tx_rebalance_counter
which can be accessed from different contexts, without
the protection of a lock/mutex.

[1]
BUG: KCSAN: data-race in bond_alb_init_slave / bond_alb_monitor

write to 0xffff888157e8ca24 of 4 bytes by task 7075 on cpu 0:
 bond_alb_init_slave+0x713/0x860 drivers/net/bonding/bond_alb.c:1613
 bond_enslave+0xd94/0x3010 drivers/net/bonding/bond_main.c:1949
 do_set_master net/core/rtnetlink.c:2521 [inline]
 __rtnl_newlink net/core/rtnetlink.c:3475 [inline]
 rtnl_newlink+0x1298/0x13b0 net/core/rtnetlink.c:3506
 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5571
 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2491
 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5589
 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
 netlink_unicast+0x5fc/0x6c0 net/netlink/af_netlink.c:1345
 netlink_sendmsg+0x6e1/0x7d0 net/netlink/af_netlink.c:1916
 sock_sendmsg_nosec net/socket.c:704 [inline]
 sock_sendmsg net/socket.c:724 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2409
 ___sys_sendmsg net/socket.c:2463 [inline]
 __sys_sendmsg+0x195/0x230 net/socket.c:2492
 __do_sys_sendmsg net/socket.c:2501 [inline]
 __se_sys_sendmsg net/socket.c:2499 [inline]
 __x64_sys_sendmsg+0x42/0x50 net/socket.c:2499
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff888157e8ca24 of 4 bytes by task 1082 on cpu 1:
 bond_alb_monitor+0x8f/0xc00 drivers/net/bonding/bond_alb.c:1511
 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298
 worker_thread+0x616/0xa70 kernel/workqueue.c:2445
 kthread+0x2c7/0x2e0 kernel/kthread.c:327
 ret_from_fork+0x1f/0x30

value changed: 0x00000001 -> 0x00000064

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 1082 Comm: kworker/u4:3 Not tainted 5.16.0-rc3-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: bond1 bond_alb_monitor

Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-12-14 10:57:09 +01:00

170 lines
6.1 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
*/
#ifndef _NET_BOND_ALB_H
#define _NET_BOND_ALB_H
#include <linux/if_ether.h>
struct bonding;
struct slave;
#define BOND_ALB_INFO(bond) ((bond)->alb_info)
#define SLAVE_TLB_INFO(slave) ((slave)->tlb_info)
#define ALB_TIMER_TICKS_PER_SEC 10 /* should be a divisor of HZ */
#define BOND_TLB_REBALANCE_INTERVAL 10 /* In seconds, periodic re-balancing.
* Used for division - never set
* to zero !!!
*/
#define BOND_ALB_DEFAULT_LP_INTERVAL 1
#define BOND_ALB_LP_INTERVAL(bond) (bond->params.lp_interval) /* In seconds, periodic send of
* learning packets to the switch
*/
#define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \
* ALB_TIMER_TICKS_PER_SEC)
#define BOND_ALB_LP_TICKS(bond) (BOND_ALB_LP_INTERVAL(bond) \
* ALB_TIMER_TICKS_PER_SEC)
#define TLB_HASH_TABLE_SIZE 256 /* The size of the clients hash table.
* Note that this value MUST NOT be smaller
* because the key hash table is BYTE wide !
*/
#define TLB_NULL_INDEX 0xffffffff
/* rlb defs */
#define RLB_HASH_TABLE_SIZE 256
#define RLB_NULL_INDEX 0xffffffff
#define RLB_UPDATE_DELAY (2*ALB_TIMER_TICKS_PER_SEC) /* 2 seconds */
#define RLB_ARP_BURST_SIZE 2
#define RLB_UPDATE_RETRY 3 /* 3-ticks - must be smaller than the rlb
* rebalance interval (5 min).
*/
/* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is
* promiscuous after failover
*/
#define RLB_PROMISC_TIMEOUT (10*ALB_TIMER_TICKS_PER_SEC)
struct tlb_client_info {
struct slave *tx_slave; /* A pointer to slave used for transmiting
* packets to a Client that the Hash function
* gave this entry index.
*/
u32 tx_bytes; /* Each Client accumulates the BytesTx that
* were transmitted to it, and after each
* CallBack the LoadHistory is divided
* by the balance interval
*/
u32 load_history; /* This field contains the amount of Bytes
* that were transmitted to this client by
* the server on the previous balance
* interval in Bps.
*/
u32 next; /* The next Hash table entry index, assigned
* to use the same adapter for transmit.
*/
u32 prev; /* The previous Hash table entry index,
* assigned to use the same
*/
};
/* -------------------------------------------------------------------------
* struct rlb_client_info contains all info related to a specific rx client
* connection. This is the Clients Hash Table entry struct.
* Note that this is not a proper hash table; if a new client's IP address
* hash collides with an existing client entry, the old entry is replaced.
*
* There is a linked list (linked by the used_next and used_prev members)
* linking all the used entries of the hash table. This allows updating
* all the clients without walking over all the unused elements of the table.
*
* There are also linked lists of entries with identical hash(ip_src). These
* allow cleaning up the table from ip_src<->mac_src associations that have
* become outdated and would cause sending out invalid ARP updates to the
* network. These are linked by the (src_next and src_prev members).
* -------------------------------------------------------------------------
*/
struct rlb_client_info {
__be32 ip_src; /* the server IP address */
__be32 ip_dst; /* the client IP address */
u8 mac_src[ETH_ALEN]; /* the server MAC address */
u8 mac_dst[ETH_ALEN]; /* the client MAC address */
/* list of used hash table entries, starting at rx_hashtbl_used_head */
u32 used_next;
u32 used_prev;
/* ip_src based hashing */
u32 src_next; /* next entry with same hash(ip_src) */
u32 src_prev; /* prev entry with same hash(ip_src) */
u32 src_first; /* first entry with hash(ip_src) == this entry's index */
u8 assigned; /* checking whether this entry is assigned */
u8 ntt; /* flag - need to transmit client info */
struct slave *slave; /* the slave assigned to this client */
unsigned short vlan_id; /* VLAN tag associated with IP address */
};
struct tlb_slave_info {
u32 head; /* Index to the head of the bi-directional clients
* hash table entries list. The entries in the list
* are the entries that were assigned to use this
* slave for transmit.
*/
u32 load; /* Each slave sums the loadHistory of all clients
* assigned to it
*/
};
struct alb_bond_info {
struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
u32 unbalanced_load;
atomic_t tx_rebalance_counter;
int lp_counter;
/* -------- rlb parameters -------- */
int rlb_enabled;
struct rlb_client_info *rx_hashtbl; /* Receive hash table */
u32 rx_hashtbl_used_head;
u8 rx_ntt; /* flag - need to transmit
* to all rx clients
*/
struct slave *rx_slave;/* last slave to xmit from */
u8 primary_is_promisc; /* boolean */
u32 rlb_promisc_timeout_counter;/* counts primary
* promiscuity time
*/
u32 rlb_update_delay_counter;
u32 rlb_update_retry_counter;/* counter of retries
* of client update
*/
u8 rlb_rebalance; /* flag - indicates that the
* rx traffic should be
* rebalanced
*/
};
int bond_alb_initialize(struct bonding *bond, int rlb_enabled);
void bond_alb_deinitialize(struct bonding *bond);
int bond_alb_init_slave(struct bonding *bond, struct slave *slave);
void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave);
void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link);
void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave);
int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
struct sk_buff *skb);
struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
struct sk_buff *skb);
void bond_alb_monitor(struct work_struct *);
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
#endif /* _NET_BOND_ALB_H */