From 2b5fdc0f5caa505afe34d608e2eefadadf2ee67a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 25 Apr 2023 13:56:35 +0100 Subject: [PATCH 01/69] rxrpc: Fix potential data race in rxrpc_wait_to_be_connected() Inside the loop in rxrpc_wait_to_be_connected() it checks call->error to see if it should exit the loop without first checking the call state. This is probably safe as if call->error is set, the call is dead anyway, but we should probably wait for the call state to have been set to completion first, lest it cause surprise on the way out. Fix this by only accessing call->error if the call is complete. We don't actually need to access the error inside the loop as we'll do that after. This caused the following report: BUG: KCSAN: data-race in rxrpc_send_data / rxrpc_set_call_completion write to 0xffff888159cf3c50 of 4 bytes by task 25673 on cpu 1: rxrpc_set_call_completion+0x71/0x1c0 net/rxrpc/call_state.c:22 rxrpc_send_data_packet+0xba9/0x1650 net/rxrpc/output.c:479 rxrpc_transmit_one+0x1e/0x130 net/rxrpc/output.c:714 rxrpc_decant_prepared_tx net/rxrpc/call_event.c:326 [inline] rxrpc_transmit_some_data+0x496/0x600 net/rxrpc/call_event.c:350 rxrpc_input_call_event+0x564/0x1220 net/rxrpc/call_event.c:464 rxrpc_io_thread+0x307/0x1d80 net/rxrpc/io_thread.c:461 kthread+0x1ac/0x1e0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 read to 0xffff888159cf3c50 of 4 bytes by task 25672 on cpu 0: rxrpc_send_data+0x29e/0x1950 net/rxrpc/sendmsg.c:296 rxrpc_do_sendmsg+0xb7a/0xc20 net/rxrpc/sendmsg.c:726 rxrpc_sendmsg+0x413/0x520 net/rxrpc/af_rxrpc.c:565 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg net/socket.c:747 [inline] ____sys_sendmsg+0x375/0x4c0 net/socket.c:2501 ___sys_sendmsg net/socket.c:2555 [inline] __sys_sendmmsg+0x263/0x500 net/socket.c:2641 __do_sys_sendmmsg net/socket.c:2670 [inline] __se_sys_sendmmsg net/socket.c:2667 [inline] __x64_sys_sendmmsg+0x57/0x60 net/socket.c:2667 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x00000000 -> 0xffffffea Fixes: 9d35d880e0e4 ("rxrpc: Move client call connection to the I/O thread") Reported-by: syzbot+ebc945fdb4acd72cba78@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/000000000000e7c6d205fa10a3cd@google.com/ Signed-off-by: David Howells cc: Marc Dionne cc: Dmitry Vyukov cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/508133.1682427395@warthog.procyon.org.uk Signed-off-by: Paolo Abeni --- net/rxrpc/sendmsg.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index da49fcf1c456..6caa47d352ed 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -50,15 +50,11 @@ static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo) _enter("%d", call->debug_id); if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) - return call->error; + goto no_wait; add_wait_queue_exclusive(&call->waitq, &myself); for (;;) { - ret = call->error; - if (ret < 0) - break; - switch (call->interruptibility) { case RXRPC_INTERRUPTIBLE: case RXRPC_PREINTERRUPTIBLE: @@ -69,10 +65,9 @@ static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo) set_current_state(TASK_UNINTERRUPTIBLE); break; } - if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) { - ret = call->error; + + if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) break; - } if ((call->interruptibility == RXRPC_INTERRUPTIBLE || call->interruptibility == RXRPC_PREINTERRUPTIBLE) && signal_pending(current)) { @@ -85,6 +80,7 @@ static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo) remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); +no_wait: if (ret == 0 && rxrpc_call_is_complete(call)) ret = call->error; From 32eff6bacec2cb574677c15378169a9fa30043ef Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 25 Apr 2023 16:06:04 +0200 Subject: [PATCH 02/69] net/sched: flower: Fix wrong handle assignment during filter change Commit 08a0063df3ae ("net/sched: flower: Move filter handle initialization earlier") moved filter handle initialization but an assignment of the handle to fnew->handle is done regardless of fold value. This is wrong because if fold != NULL (so fold->handle == handle) no new handle is allocated and passed handle is assigned to fnew->handle. Then if any subsequent action in fl_change() fails then the handle value is removed from IDR that is incorrect as we will have still valid old filter instance with handle that is not present in IDR. Fix this issue by moving the assignment so it is done only when passed fold == NULL. Prior the patch: [root@machine tc-testing]# ./tdc.py -d enp1s0f0np0 -e 14be Test 14be: Concurrently replace same range of 100k flower filters from 10 tc instances exit: 123 exit: 0 RTNETLINK answers: Invalid argument We have an error talking to the kernel Command failed tmp/replace_6:1885 All test results: 1..1 not ok 1 14be - Concurrently replace same range of 100k flower filters from 10 tc instances Command exited with 123, expected 0 RTNETLINK answers: Invalid argument We have an error talking to the kernel Command failed tmp/replace_6:1885 After the patch: [root@machine tc-testing]# ./tdc.py -d enp1s0f0np0 -e 14be Test 14be: Concurrently replace same range of 100k flower filters from 10 tc instances All test results: 1..1 ok 1 14be - Concurrently replace same range of 100k flower filters from 10 tc instances Fixes: 08a0063df3ae ("net/sched: flower: Move filter handle initialization earlier") Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230425140604.169881-1-ivecera@redhat.com Signed-off-by: Paolo Abeni --- net/sched/cls_flower.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index cc49256d5318..6ab6aadc07b8 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2231,8 +2231,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, kfree(fnew); goto errout_tb; } + fnew->handle = handle; } - fnew->handle = handle; err = tcf_exts_init_ex(&fnew->exts, net, TCA_FLOWER_ACT, 0, tp, handle, !tc_skip_hw(fnew->flags)); From 1b483d9f5805c7e3d628d4995e97f4311fcb82eb Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 25 Apr 2023 11:47:25 -0300 Subject: [PATCH 03/69] net/sched: act_pedit: free pedit keys on bail from offset check Ido Schimmel reports a memleak on a syzkaller instance: BUG: memory leak unreferenced object 0xffff88803d45e400 (size 1024): comm "syz-executor292", pid 563, jiffies 4295025223 (age 51.781s) hex dump (first 32 bytes): 28 bd 70 00 fb db df 25 02 00 14 1f ff 02 00 02 (.p....%........ 00 32 00 00 1f 00 00 00 ac 14 14 3e 08 00 07 00 .2.........>.... backtrace: [] kmemleak_alloc_recursive include/linux/kmemleak.h:42 [inline] [] slab_post_alloc_hook mm/slab.h:772 [inline] [] slab_alloc_node mm/slub.c:3452 [inline] [] __kmem_cache_alloc_node+0x25c/0x320 mm/slub.c:3491 [] __do_kmalloc_node mm/slab_common.c:966 [inline] [] __kmalloc+0x59/0x1a0 mm/slab_common.c:980 [] kmalloc include/linux/slab.h:584 [inline] [] tcf_pedit_init+0x793/0x1ae0 net/sched/act_pedit.c:245 [] tcf_action_init_1+0x453/0x6e0 net/sched/act_api.c:1394 [] tcf_action_init+0x5a8/0x950 net/sched/act_api.c:1459 [] tcf_action_add+0x118/0x4e0 net/sched/act_api.c:1985 [] tc_ctl_action+0x377/0x490 net/sched/act_api.c:2044 [] rtnetlink_rcv_msg+0x46d/0xd70 net/core/rtnetlink.c:6395 [] netlink_rcv_skb+0x185/0x490 net/netlink/af_netlink.c:2575 [] rtnetlink_rcv+0x26/0x30 net/core/rtnetlink.c:6413 [] netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] [] netlink_unicast+0x5be/0x8a0 net/netlink/af_netlink.c:1365 [] netlink_sendmsg+0x9af/0xed0 net/netlink/af_netlink.c:1942 [] sock_sendmsg_nosec net/socket.c:724 [inline] [] sock_sendmsg net/socket.c:747 [inline] [] ____sys_sendmsg+0x3ef/0xaa0 net/socket.c:2503 [] ___sys_sendmsg+0x122/0x1c0 net/socket.c:2557 [] __sys_sendmsg+0x11f/0x200 net/socket.c:2586 [] __do_sys_sendmsg net/socket.c:2595 [inline] [] __se_sys_sendmsg net/socket.c:2593 [inline] [] __x64_sys_sendmsg+0x80/0xc0 net/socket.c:2593 The recently added static offset check missed a free to the key buffer when bailing out on error. Fixes: e1201bc781c2 ("net/sched: act_pedit: check static offsets a priori") Reported-by: Ido Schimmel Signed-off-by: Pedro Tammela Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://lore.kernel.org/r/20230425144725.669262-1-pctammela@mojatatu.com Signed-off-by: Paolo Abeni --- net/sched/act_pedit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fb93d4c1faca..fc945c7e4123 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -258,7 +258,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (!offmask && cur % 4) { NL_SET_ERR_MSG_MOD(extack, "Offsets must be on 32bit boundaries"); ret = -EINVAL; - goto put_chain; + goto out_free_keys; } /* sanitize the shift value for any later use */ @@ -291,6 +291,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, return ret; +out_free_keys: + kfree(nparms->tcfp_keys); put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); From c23ae5091a8b3e50fe755257df020907e7c029bb Mon Sep 17 00:00:00 2001 From: John Hickey Date: Tue, 25 Apr 2023 10:03:08 -0700 Subject: [PATCH 04/69] ixgbe: Fix panic during XDP_TX with > 64 CPUs Commit 4fe815850bdc ("ixgbe: let the xdpdrv work with more than 64 cpus") adds support to allow XDP programs to run on systems with more than 64 CPUs by locking the XDP TX rings and indexing them using cpu % 64 (IXGBE_MAX_XDP_QS). Upon trying this out patch on a system with more than 64 cores, the kernel paniced with an array-index-out-of-bounds at the return in ixgbe_determine_xdp_ring in ixgbe.h, which means ixgbe_determine_xdp_q_idx was just returning the cpu instead of cpu % IXGBE_MAX_XDP_QS. An example splat: ========================================================================== UBSAN: array-index-out-of-bounds in /var/lib/dkms/ixgbe/5.18.6+focal-1/build/src/ixgbe.h:1147:26 index 65 is out of range for type 'ixgbe_ring *[64]' ========================================================================== BUG: kernel NULL pointer dereference, address: 0000000000000058 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 65 PID: 408 Comm: ksoftirqd/65 Tainted: G IOE 5.15.0-48-generic #54~20.04.1-Ubuntu Hardware name: Dell Inc. PowerEdge R640/0W23H8, BIOS 2.5.4 01/13/2020 RIP: 0010:ixgbe_xmit_xdp_ring+0x1b/0x1c0 [ixgbe] Code: 3b 52 d4 cf e9 42 f2 ff ff 66 0f 1f 44 00 00 0f 1f 44 00 00 55 b9 00 00 00 00 48 89 e5 41 57 41 56 41 55 41 54 53 48 83 ec 08 <44> 0f b7 47 58 0f b7 47 5a 0f b7 57 54 44 0f b7 76 08 66 41 39 c0 RSP: 0018:ffffbc3fcd88fcb0 EFLAGS: 00010282 RAX: ffff92a253260980 RBX: ffffbc3fe68b00a0 RCX: 0000000000000000 RDX: ffff928b5f659000 RSI: ffff928b5f659000 RDI: 0000000000000000 RBP: ffffbc3fcd88fce0 R08: ffff92b9dfc20580 R09: 0000000000000001 R10: 3d3d3d3d3d3d3d3d R11: 3d3d3d3d3d3d3d3d R12: 0000000000000000 R13: ffff928b2f0fa8c0 R14: ffff928b9be20050 R15: 000000000000003c FS: 0000000000000000(0000) GS:ffff92b9dfc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000058 CR3: 000000011dd6a002 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ixgbe_poll+0x103e/0x1280 [ixgbe] ? sched_clock_cpu+0x12/0xe0 __napi_poll+0x30/0x160 net_rx_action+0x11c/0x270 __do_softirq+0xda/0x2ee run_ksoftirqd+0x2f/0x50 smpboot_thread_fn+0xb7/0x150 ? sort_range+0x30/0x30 kthread+0x127/0x150 ? set_kthread_struct+0x50/0x50 ret_from_fork+0x1f/0x30 I think this is how it happens: Upon loading the first XDP program on a system with more than 64 CPUs, ixgbe_xdp_locking_key is incremented in ixgbe_xdp_setup. However, immediately after this, the rings are reconfigured by ixgbe_setup_tc. ixgbe_setup_tc calls ixgbe_clear_interrupt_scheme which calls ixgbe_free_q_vectors which calls ixgbe_free_q_vector in a loop. ixgbe_free_q_vector decrements ixgbe_xdp_locking_key once per call if it is non-zero. Commenting out the decrement in ixgbe_free_q_vector stopped my system from panicing. I suspect to make the original patch work, I would need to load an XDP program and then replace it in order to get ixgbe_xdp_locking_key back above 0 since ixgbe_setup_tc is only called when transitioning between XDP and non-XDP ring configurations, while ixgbe_xdp_locking_key is incremented every time ixgbe_xdp_setup is called. Also, ixgbe_setup_tc can be called via ethtool --set-channels, so this becomes another path to decrement ixgbe_xdp_locking_key to 0 on systems with more than 64 CPUs. Since ixgbe_xdp_locking_key only protects the XDP_TX path and is tied to the number of CPUs present, there is no reason to disable it upon unloading an XDP program. To avoid confusion, I have moved enabling ixgbe_xdp_locking_key into ixgbe_sw_init, which is part of the probe path. Fixes: 4fe815850bdc ("ixgbe: let the xdpdrv work with more than 64 cpus") Signed-off-by: John Hickey Reviewed-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230425170308.2522429-1-anthony.l.nguyen@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 3 --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ++++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index f8156fe4b1dc..0ee943db3dc9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -1035,9 +1035,6 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx) adapter->q_vector[v_idx] = NULL; __netif_napi_del(&q_vector->napi); - if (static_key_enabled(&ixgbe_xdp_locking_key)) - static_branch_dec(&ixgbe_xdp_locking_key); - /* * after a call to __netif_napi_del() napi may still be used and * ixgbe_get_stats64() might access the rings on this vector, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e961ef4bbf4d..5d83c887a3fc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6487,6 +6487,10 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, set_bit(0, adapter->fwd_bitmask); set_bit(__IXGBE_DOWN, &adapter->state); + /* enable locking for XDP_TX if we have more CPUs than queues */ + if (nr_cpu_ids > IXGBE_MAX_XDP_QS) + static_branch_enable(&ixgbe_xdp_locking_key); + return 0; } @@ -10270,8 +10274,6 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) */ if (nr_cpu_ids > IXGBE_MAX_XDP_QS * 2) return -ENOMEM; - else if (nr_cpu_ids > IXGBE_MAX_XDP_QS) - static_branch_inc(&ixgbe_xdp_locking_key); old_prog = xchg(&adapter->xdp_prog, prog); need_reset = (!!prog != !!old_prog); From c222b292a3568754828ffd30338d2909b14ed160 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Wed, 26 Apr 2023 11:55:20 +0530 Subject: [PATCH 05/69] octeonxt2-af: mcs: Fix per port bypass config For each lmac port, MCS has two MCS_TOP_SLAVE_CHANNEL_CONFIGX registers. For CN10KB both register need to be configured for the port level mcs bypass to work. This patch also sets bitmap of flowid/secy entry reserved for default bypass so that these entries can be shown in debugfs. Fixes: bd69476e86fc ("octeontx2-af: cn10k: mcs: Install a default TCAM for normal traffic") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Reviewed-by: Leon Romanovsky Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/mcs.c | 11 ++++++++++- .../net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 5 +++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index f68a6a0e3aa4..492baa0b594c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -494,6 +494,9 @@ int mcs_install_flowid_bypass_entry(struct mcs *mcs) /* Flow entry */ flow_id = mcs->hw->tcam_entries - MCS_RSRC_RSVD_CNT; + __set_bit(flow_id, mcs->rx.flow_ids.bmap); + __set_bit(flow_id, mcs->tx.flow_ids.bmap); + for (reg_id = 0; reg_id < 4; reg_id++) { reg = MCSX_CPM_RX_SLAVE_FLOWID_TCAM_MASKX(reg_id, flow_id); mcs_reg_write(mcs, reg, GENMASK_ULL(63, 0)); @@ -504,6 +507,8 @@ int mcs_install_flowid_bypass_entry(struct mcs *mcs) } /* secy */ secy_id = mcs->hw->secy_entries - MCS_RSRC_RSVD_CNT; + __set_bit(secy_id, mcs->rx.secy.bmap); + __set_bit(secy_id, mcs->tx.secy.bmap); /* Set validate frames to NULL and enable control port */ plcy = 0x7ull; @@ -528,6 +533,7 @@ int mcs_install_flowid_bypass_entry(struct mcs *mcs) /* Enable Flowid entry */ mcs_ena_dis_flowid_entry(mcs, flow_id, MCS_RX, true); mcs_ena_dis_flowid_entry(mcs, flow_id, MCS_TX, true); + return 0; } @@ -1325,8 +1331,11 @@ void mcs_reset_port(struct mcs *mcs, u8 port_id, u8 reset) void mcs_set_lmac_mode(struct mcs *mcs, int lmac_id, u8 mode) { u64 reg; + int id = lmac_id * 2; - reg = MCSX_MCS_TOP_SLAVE_CHANNEL_CFG(lmac_id * 2); + reg = MCSX_MCS_TOP_SLAVE_CHANNEL_CFG(id); + mcs_reg_write(mcs, reg, (u64)mode); + reg = MCSX_MCS_TOP_SLAVE_CHANNEL_CFG((id + 1)); mcs_reg_write(mcs, reg, (u64)mode); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 26cfa501f1a1..9533b1d92960 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -497,8 +497,9 @@ static int rvu_dbg_mcs_rx_secy_stats_display(struct seq_file *filp, void *unused stats.octet_validated_cnt); seq_printf(filp, "secy%d: Pkts on disable port: %lld\n", secy_id, stats.pkt_port_disabled_cnt); - seq_printf(filp, "secy%d: Octets validated: %lld\n", secy_id, stats.pkt_badtag_cnt); - seq_printf(filp, "secy%d: Octets validated: %lld\n", secy_id, stats.pkt_nosa_cnt); + seq_printf(filp, "secy%d: Pkts with badtag: %lld\n", secy_id, stats.pkt_badtag_cnt); + seq_printf(filp, "secy%d: Pkts with no SA(sectag.tci.c=0): %lld\n", secy_id, + stats.pkt_nosa_cnt); seq_printf(filp, "secy%d: Pkts with nosaerror: %lld\n", secy_id, stats.pkt_nosaerror_cnt); seq_printf(filp, "secy%d: Tagged ctrl pkts: %lld\n", secy_id, From b51612198603fce33d6cf57b4864e3018a1cd9b8 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 26 Apr 2023 11:55:21 +0530 Subject: [PATCH 06/69] octeontx2-af: mcs: Write TCAM_DATA and TCAM_MASK registers at once As per hardware errata on CN10KB, all the four TCAM_DATA and TCAM_MASK registers has to be written at once otherwise write to individual registers will fail. Hence write to all TCAM_DATA registers and then to all TCAM_MASK registers. Fixes: cfc14181d497 ("octeontx2-af: cn10k: mcs: Manage the MCS block hardware resources") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Geetha sowjanya Reviewed-by: Leon Romanovsky Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/mcs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index 492baa0b594c..148417d633a5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -473,6 +473,8 @@ void mcs_flowid_entry_write(struct mcs *mcs, u64 *data, u64 *mask, int flow_id, for (reg_id = 0; reg_id < 4; reg_id++) { reg = MCSX_CPM_RX_SLAVE_FLOWID_TCAM_DATAX(reg_id, flow_id); mcs_reg_write(mcs, reg, data[reg_id]); + } + for (reg_id = 0; reg_id < 4; reg_id++) { reg = MCSX_CPM_RX_SLAVE_FLOWID_TCAM_MASKX(reg_id, flow_id); mcs_reg_write(mcs, reg, mask[reg_id]); } @@ -480,6 +482,8 @@ void mcs_flowid_entry_write(struct mcs *mcs, u64 *data, u64 *mask, int flow_id, for (reg_id = 0; reg_id < 4; reg_id++) { reg = MCSX_CPM_TX_SLAVE_FLOWID_TCAM_DATAX(reg_id, flow_id); mcs_reg_write(mcs, reg, data[reg_id]); + } + for (reg_id = 0; reg_id < 4; reg_id++) { reg = MCSX_CPM_TX_SLAVE_FLOWID_TCAM_MASKX(reg_id, flow_id); mcs_reg_write(mcs, reg, mask[reg_id]); } From 65cdc2b637a5749c7dec0ce14fe2c48f1f91f671 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Wed, 26 Apr 2023 11:55:22 +0530 Subject: [PATCH 07/69] octeontx2-af: mcs: Config parser to skip 8B header When ptp timestamp is enabled in RPM, RPM will append 8B timestamp header for all RX traffic. MCS need to skip these 8 bytes header while parsing the packet header, so that correct tcam key is created for lookup. This patch fixes the mcs parser configuration to skip this 8B header for ptp packets. Fixes: ca7f49ff8846 ("octeontx2-af: cn10k: Introduce driver for macsec block.") Signed-off-by: Sunil Goutham Signed-off-by: Geetha sowjanya Reviewed-by: Leon Romanovsky Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/af/mcs_reg.h | 1 + .../marvell/octeontx2/af/mcs_rvu_if.c | 37 +++++++++++++++++++ .../net/ethernet/marvell/octeontx2/af/rvu.h | 1 + .../ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 + 4 files changed, 41 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h index c95a8b8f5eaf..7427e3b1490f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h @@ -97,6 +97,7 @@ #define MCSX_PEX_TX_SLAVE_VLAN_CFGX(a) (0x46f8ull + (a) * 0x8ull) #define MCSX_PEX_TX_SLAVE_CUSTOM_TAG_REL_MODE_SEL(a) (0x788ull + (a) * 0x8ull) #define MCSX_PEX_TX_SLAVE_PORT_CONFIG(a) (0x4738ull + (a) * 0x8ull) +#define MCSX_PEX_RX_SLAVE_PORT_CFGX(a) (0x3b98ull + (a) * 0x8ull) #define MCSX_PEX_RX_SLAVE_RULE_ETYPE_CFGX(a) ({ \ u64 offset; \ \ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c index eb25e458266c..dfd23580e3b8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c @@ -11,6 +11,7 @@ #include "mcs.h" #include "rvu.h" +#include "mcs_reg.h" #include "lmac_common.h" #define M(_name, _id, _fn_name, _req_type, _rsp_type) \ @@ -32,6 +33,42 @@ static struct _req_type __maybe_unused \ MBOX_UP_MCS_MESSAGES #undef M +void rvu_mcs_ptp_cfg(struct rvu *rvu, u8 rpm_id, u8 lmac_id, bool ena) +{ + struct mcs *mcs; + u64 cfg; + u8 port; + + if (!rvu->mcs_blk_cnt) + return; + + /* When ptp is enabled, RPM appends 8B header for all + * RX packets. MCS PEX need to configure to skip 8B + * during packet parsing. + */ + + /* CNF10K-B */ + if (rvu->mcs_blk_cnt > 1) { + mcs = mcs_get_pdata(rpm_id); + cfg = mcs_reg_read(mcs, MCSX_PEX_RX_SLAVE_PEX_CONFIGURATION); + if (ena) + cfg |= BIT_ULL(lmac_id); + else + cfg &= ~BIT_ULL(lmac_id); + mcs_reg_write(mcs, MCSX_PEX_RX_SLAVE_PEX_CONFIGURATION, cfg); + return; + } + /* CN10KB */ + mcs = mcs_get_pdata(0); + port = (rpm_id * rvu->hw->lmac_per_cgx) + lmac_id; + cfg = mcs_reg_read(mcs, MCSX_PEX_RX_SLAVE_PORT_CFGX(port)); + if (ena) + cfg |= BIT_ULL(0); + else + cfg &= ~BIT_ULL(0); + mcs_reg_write(mcs, MCSX_PEX_RX_SLAVE_PORT_CFGX(port), cfg); +} + int rvu_mbox_handler_mcs_set_lmac_mode(struct rvu *rvu, struct mcs_set_lmac_mode *req, struct msg_rsp *rsp) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index ef721caeac49..d655bf04a483 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -920,6 +920,7 @@ int rvu_get_hwvf(struct rvu *rvu, int pcifunc); /* CN10K MCS */ int rvu_mcs_init(struct rvu *rvu); int rvu_mcs_flr_handler(struct rvu *rvu, u16 pcifunc); +void rvu_mcs_ptp_cfg(struct rvu *rvu, u8 rpm_id, u8 lmac_id, bool ena); void rvu_mcs_exit(struct rvu *rvu); #endif /* RVU_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 438b212fb54a..83b342fa8d75 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -773,6 +773,8 @@ static int rvu_cgx_ptp_rx_cfg(struct rvu *rvu, u16 pcifunc, bool enable) /* This flag is required to clean up CGX conf if app gets killed */ pfvf->hw_rx_tstamp_en = enable; + /* Inform MCS about 8B RX header */ + rvu_mcs_ptp_cfg(rvu, cgx_id, lmac_id, enable); return 0; } From b8aebeaaf9ffb1e99c642eb3751e28981f9be475 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Wed, 26 Apr 2023 11:55:23 +0530 Subject: [PATCH 08/69] octeontx2-af: mcs: Fix MCS block interrupt On CN10KB, MCS IP vector number, BBE and PAB interrupt mask got changed to support more block level interrupts. To address this changes, this patch fixes the bbe and pab interrupt handlers. Fixes: 6c635f78c474 ("octeontx2-af: cn10k: mcs: Handle MCS block interrupts") Signed-off-by: Sunil Goutham Signed-off-by: Geetha sowjanya Reviewed-by: Leon Romanovsky Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/af/mcs.c | 95 ++++++++----------- .../net/ethernet/marvell/octeontx2/af/mcs.h | 26 +++-- .../marvell/octeontx2/af/mcs_cnf10kb.c | 63 ++++++++++++ .../ethernet/marvell/octeontx2/af/mcs_reg.h | 5 +- 4 files changed, 119 insertions(+), 70 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index 148417d633a5..c43f19dfbd74 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -936,60 +936,42 @@ static void mcs_tx_misc_intr_handler(struct mcs *mcs, u64 intr) mcs_add_intr_wq_entry(mcs, &event); } -static void mcs_bbe_intr_handler(struct mcs *mcs, u64 intr, enum mcs_direction dir) +void cn10kb_mcs_bbe_intr_handler(struct mcs *mcs, u64 intr, + enum mcs_direction dir) { - struct mcs_intr_event event = { 0 }; - int i; + u64 val, reg; + int lmac; - if (!(intr & MCS_BBE_INT_MASK)) + if (!(intr & 0x6ULL)) return; - event.mcs_id = mcs->mcs_id; - event.pcifunc = mcs->pf_map[0]; + if (intr & BIT_ULL(1)) + reg = (dir == MCS_RX) ? MCSX_BBE_RX_SLAVE_DFIFO_OVERFLOW_0 : + MCSX_BBE_TX_SLAVE_DFIFO_OVERFLOW_0; + else + reg = (dir == MCS_RX) ? MCSX_BBE_RX_SLAVE_PLFIFO_OVERFLOW_0 : + MCSX_BBE_TX_SLAVE_PLFIFO_OVERFLOW_0; + val = mcs_reg_read(mcs, reg); - for (i = 0; i < MCS_MAX_BBE_INT; i++) { - if (!(intr & BIT_ULL(i))) + /* policy/data over flow occurred */ + for (lmac = 0; lmac < mcs->hw->lmac_cnt; lmac++) { + if (!(val & BIT_ULL(lmac))) continue; - - /* Lower nibble denotes data fifo overflow interrupts and - * upper nibble indicates policy fifo overflow interrupts. - */ - if (intr & 0xFULL) - event.intr_mask = (dir == MCS_RX) ? - MCS_BBE_RX_DFIFO_OVERFLOW_INT : - MCS_BBE_TX_DFIFO_OVERFLOW_INT; - else - event.intr_mask = (dir == MCS_RX) ? - MCS_BBE_RX_PLFIFO_OVERFLOW_INT : - MCS_BBE_TX_PLFIFO_OVERFLOW_INT; - - /* Notify the lmac_id info which ran into BBE fatal error */ - event.lmac_id = i & 0x3ULL; - mcs_add_intr_wq_entry(mcs, &event); + dev_warn(mcs->dev, "BEE:Policy or data overflow occurred on lmac:%d\n", lmac); } } -static void mcs_pab_intr_handler(struct mcs *mcs, u64 intr, enum mcs_direction dir) +void cn10kb_mcs_pab_intr_handler(struct mcs *mcs, u64 intr, + enum mcs_direction dir) { - struct mcs_intr_event event = { 0 }; - int i; + int lmac; - if (!(intr & MCS_PAB_INT_MASK)) + if (!(intr & 0xFFFFFULL)) return; - event.mcs_id = mcs->mcs_id; - event.pcifunc = mcs->pf_map[0]; - - for (i = 0; i < MCS_MAX_PAB_INT; i++) { - if (!(intr & BIT_ULL(i))) - continue; - - event.intr_mask = (dir == MCS_RX) ? MCS_PAB_RX_CHAN_OVERFLOW_INT : - MCS_PAB_TX_CHAN_OVERFLOW_INT; - - /* Notify the lmac_id info which ran into PAB fatal error */ - event.lmac_id = i; - mcs_add_intr_wq_entry(mcs, &event); + for (lmac = 0; lmac < mcs->hw->lmac_cnt; lmac++) { + if (intr & BIT_ULL(lmac)) + dev_warn(mcs->dev, "PAB: overflow occurred on lmac:%d\n", lmac); } } @@ -998,9 +980,8 @@ static irqreturn_t mcs_ip_intr_handler(int irq, void *mcs_irq) struct mcs *mcs = (struct mcs *)mcs_irq; u64 intr, cpm_intr, bbe_intr, pab_intr; - /* Disable and clear the interrupt */ + /* Disable the interrupt */ mcs_reg_write(mcs, MCSX_IP_INT_ENA_W1C, BIT_ULL(0)); - mcs_reg_write(mcs, MCSX_IP_INT, BIT_ULL(0)); /* Check which block has interrupt*/ intr = mcs_reg_read(mcs, MCSX_TOP_SLAVE_INT_SUM); @@ -1047,7 +1028,7 @@ static irqreturn_t mcs_ip_intr_handler(int irq, void *mcs_irq) /* BBE RX */ if (intr & MCS_BBE_RX_INT_ENA) { bbe_intr = mcs_reg_read(mcs, MCSX_BBE_RX_SLAVE_BBE_INT); - mcs_bbe_intr_handler(mcs, bbe_intr, MCS_RX); + mcs->mcs_ops->mcs_bbe_intr_handler(mcs, bbe_intr, MCS_RX); /* Clear the interrupt */ mcs_reg_write(mcs, MCSX_BBE_RX_SLAVE_BBE_INT_INTR_RW, 0); @@ -1057,7 +1038,7 @@ static irqreturn_t mcs_ip_intr_handler(int irq, void *mcs_irq) /* BBE TX */ if (intr & MCS_BBE_TX_INT_ENA) { bbe_intr = mcs_reg_read(mcs, MCSX_BBE_TX_SLAVE_BBE_INT); - mcs_bbe_intr_handler(mcs, bbe_intr, MCS_TX); + mcs->mcs_ops->mcs_bbe_intr_handler(mcs, bbe_intr, MCS_TX); /* Clear the interrupt */ mcs_reg_write(mcs, MCSX_BBE_TX_SLAVE_BBE_INT_INTR_RW, 0); @@ -1067,7 +1048,7 @@ static irqreturn_t mcs_ip_intr_handler(int irq, void *mcs_irq) /* PAB RX */ if (intr & MCS_PAB_RX_INT_ENA) { pab_intr = mcs_reg_read(mcs, MCSX_PAB_RX_SLAVE_PAB_INT); - mcs_pab_intr_handler(mcs, pab_intr, MCS_RX); + mcs->mcs_ops->mcs_pab_intr_handler(mcs, pab_intr, MCS_RX); /* Clear the interrupt */ mcs_reg_write(mcs, MCSX_PAB_RX_SLAVE_PAB_INT_INTR_RW, 0); @@ -1077,14 +1058,15 @@ static irqreturn_t mcs_ip_intr_handler(int irq, void *mcs_irq) /* PAB TX */ if (intr & MCS_PAB_TX_INT_ENA) { pab_intr = mcs_reg_read(mcs, MCSX_PAB_TX_SLAVE_PAB_INT); - mcs_pab_intr_handler(mcs, pab_intr, MCS_TX); + mcs->mcs_ops->mcs_pab_intr_handler(mcs, pab_intr, MCS_TX); /* Clear the interrupt */ mcs_reg_write(mcs, MCSX_PAB_TX_SLAVE_PAB_INT_INTR_RW, 0); mcs_reg_write(mcs, MCSX_PAB_TX_SLAVE_PAB_INT, pab_intr); } - /* Enable the interrupt */ + /* Clear and enable the interrupt */ + mcs_reg_write(mcs, MCSX_IP_INT, BIT_ULL(0)); mcs_reg_write(mcs, MCSX_IP_INT_ENA_W1S, BIT_ULL(0)); return IRQ_HANDLED; @@ -1166,7 +1148,7 @@ static int mcs_register_interrupts(struct mcs *mcs) return ret; } - ret = request_irq(pci_irq_vector(mcs->pdev, MCS_INT_VEC_IP), + ret = request_irq(pci_irq_vector(mcs->pdev, mcs->hw->ip_vec), mcs_ip_intr_handler, 0, "MCS_IP", mcs); if (ret) { dev_err(mcs->dev, "MCS IP irq registration failed\n"); @@ -1185,11 +1167,11 @@ static int mcs_register_interrupts(struct mcs *mcs) mcs_reg_write(mcs, MCSX_CPM_TX_SLAVE_TX_INT_ENB, 0x7ULL); mcs_reg_write(mcs, MCSX_CPM_RX_SLAVE_RX_INT_ENB, 0x7FULL); - mcs_reg_write(mcs, MCSX_BBE_RX_SLAVE_BBE_INT_ENB, 0xff); - mcs_reg_write(mcs, MCSX_BBE_TX_SLAVE_BBE_INT_ENB, 0xff); + mcs_reg_write(mcs, MCSX_BBE_RX_SLAVE_BBE_INT_ENB, 0xFFULL); + mcs_reg_write(mcs, MCSX_BBE_TX_SLAVE_BBE_INT_ENB, 0xFFULL); - mcs_reg_write(mcs, MCSX_PAB_RX_SLAVE_PAB_INT_ENB, 0xff); - mcs_reg_write(mcs, MCSX_PAB_TX_SLAVE_PAB_INT_ENB, 0xff); + mcs_reg_write(mcs, MCSX_PAB_RX_SLAVE_PAB_INT_ENB, 0xFFFFFULL); + mcs_reg_write(mcs, MCSX_PAB_TX_SLAVE_PAB_INT_ENB, 0xFFFFFULL); mcs->tx_sa_active = alloc_mem(mcs, mcs->hw->sc_entries); if (!mcs->tx_sa_active) { @@ -1200,7 +1182,7 @@ static int mcs_register_interrupts(struct mcs *mcs) return ret; free_irq: - free_irq(pci_irq_vector(mcs->pdev, MCS_INT_VEC_IP), mcs); + free_irq(pci_irq_vector(mcs->pdev, mcs->hw->ip_vec), mcs); exit: pci_free_irq_vectors(mcs->pdev); mcs->num_vec = 0; @@ -1497,6 +1479,7 @@ void cn10kb_mcs_set_hw_capabilities(struct mcs *mcs) hw->lmac_cnt = 20; /* lmacs/ports per mcs block */ hw->mcs_x2p_intf = 5; /* x2p clabration intf */ hw->mcs_blks = 1; /* MCS blocks */ + hw->ip_vec = MCS_CN10KB_INT_VEC_IP; /* IP vector */ } static struct mcs_ops cn10kb_mcs_ops = { @@ -1505,6 +1488,8 @@ static struct mcs_ops cn10kb_mcs_ops = { .mcs_tx_sa_mem_map_write = cn10kb_mcs_tx_sa_mem_map_write, .mcs_rx_sa_mem_map_write = cn10kb_mcs_rx_sa_mem_map_write, .mcs_flowid_secy_map = cn10kb_mcs_flowid_secy_map, + .mcs_bbe_intr_handler = cn10kb_mcs_bbe_intr_handler, + .mcs_pab_intr_handler = cn10kb_mcs_pab_intr_handler, }; static int mcs_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -1605,7 +1590,7 @@ static void mcs_remove(struct pci_dev *pdev) /* Set MCS to external bypass */ mcs_set_external_bypass(mcs, true); - free_irq(pci_irq_vector(pdev, MCS_INT_VEC_IP), mcs); + free_irq(pci_irq_vector(pdev, mcs->hw->ip_vec), mcs); pci_free_irq_vectors(pdev); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.h b/drivers/net/ethernet/marvell/octeontx2/af/mcs.h index 64dc2b80e15d..0f89dcb76465 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.h @@ -43,24 +43,15 @@ /* Reserved resources for default bypass entry */ #define MCS_RSRC_RSVD_CNT 1 -/* MCS Interrupt Vector Enumeration */ -enum mcs_int_vec_e { - MCS_INT_VEC_MIL_RX_GBL = 0x0, - MCS_INT_VEC_MIL_RX_LMACX = 0x1, - MCS_INT_VEC_MIL_TX_LMACX = 0x5, - MCS_INT_VEC_HIL_RX_GBL = 0x9, - MCS_INT_VEC_HIL_RX_LMACX = 0xa, - MCS_INT_VEC_HIL_TX_GBL = 0xe, - MCS_INT_VEC_HIL_TX_LMACX = 0xf, - MCS_INT_VEC_IP = 0x13, - MCS_INT_VEC_CNT = 0x14, -}; +/* MCS Interrupt Vector */ +#define MCS_CNF10KB_INT_VEC_IP 0x13 +#define MCS_CN10KB_INT_VEC_IP 0x53 #define MCS_MAX_BBE_INT 8ULL #define MCS_BBE_INT_MASK 0xFFULL -#define MCS_MAX_PAB_INT 4ULL -#define MCS_PAB_INT_MASK 0xFULL +#define MCS_MAX_PAB_INT 8ULL +#define MCS_PAB_INT_MASK 0xFULL #define MCS_BBE_RX_INT_ENA BIT_ULL(0) #define MCS_BBE_TX_INT_ENA BIT_ULL(1) @@ -137,6 +128,7 @@ struct hwinfo { u8 lmac_cnt; u8 mcs_blks; unsigned long lmac_bmap; /* bitmap of enabled mcs lmac */ + u16 ip_vec; }; struct mcs { @@ -165,6 +157,8 @@ struct mcs_ops { void (*mcs_tx_sa_mem_map_write)(struct mcs *mcs, struct mcs_tx_sc_sa_map *map); void (*mcs_rx_sa_mem_map_write)(struct mcs *mcs, struct mcs_rx_sc_sa_map *map); void (*mcs_flowid_secy_map)(struct mcs *mcs, struct secy_mem_map *map, int dir); + void (*mcs_bbe_intr_handler)(struct mcs *mcs, u64 intr, enum mcs_direction dir); + void (*mcs_pab_intr_handler)(struct mcs *mcs, u64 intr, enum mcs_direction dir); }; extern struct pci_driver mcs_driver; @@ -219,6 +213,8 @@ void cn10kb_mcs_tx_sa_mem_map_write(struct mcs *mcs, struct mcs_tx_sc_sa_map *ma void cn10kb_mcs_flowid_secy_map(struct mcs *mcs, struct secy_mem_map *map, int dir); void cn10kb_mcs_rx_sa_mem_map_write(struct mcs *mcs, struct mcs_rx_sc_sa_map *map); void cn10kb_mcs_parser_cfg(struct mcs *mcs); +void cn10kb_mcs_pab_intr_handler(struct mcs *mcs, u64 intr, enum mcs_direction dir); +void cn10kb_mcs_bbe_intr_handler(struct mcs *mcs, u64 intr, enum mcs_direction dir); /* CNF10K-B APIs */ struct mcs_ops *cnf10kb_get_mac_ops(void); @@ -229,6 +225,8 @@ void cnf10kb_mcs_rx_sa_mem_map_write(struct mcs *mcs, struct mcs_rx_sc_sa_map *m void cnf10kb_mcs_parser_cfg(struct mcs *mcs); void cnf10kb_mcs_tx_pn_thresh_reached_handler(struct mcs *mcs); void cnf10kb_mcs_tx_pn_wrapped_handler(struct mcs *mcs); +void cnf10kb_mcs_bbe_intr_handler(struct mcs *mcs, u64 intr, enum mcs_direction dir); +void cnf10kb_mcs_pab_intr_handler(struct mcs *mcs, u64 intr, enum mcs_direction dir); /* Stats APIs */ void mcs_get_sc_stats(struct mcs *mcs, struct mcs_sc_stats *stats, int id, int dir); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_cnf10kb.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_cnf10kb.c index 7b6205414428..9f9b904ab2cd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_cnf10kb.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_cnf10kb.c @@ -13,6 +13,8 @@ static struct mcs_ops cnf10kb_mcs_ops = { .mcs_tx_sa_mem_map_write = cnf10kb_mcs_tx_sa_mem_map_write, .mcs_rx_sa_mem_map_write = cnf10kb_mcs_rx_sa_mem_map_write, .mcs_flowid_secy_map = cnf10kb_mcs_flowid_secy_map, + .mcs_bbe_intr_handler = cnf10kb_mcs_bbe_intr_handler, + .mcs_pab_intr_handler = cnf10kb_mcs_pab_intr_handler, }; struct mcs_ops *cnf10kb_get_mac_ops(void) @@ -31,6 +33,7 @@ void cnf10kb_mcs_set_hw_capabilities(struct mcs *mcs) hw->lmac_cnt = 4; /* lmacs/ports per mcs block */ hw->mcs_x2p_intf = 1; /* x2p clabration intf */ hw->mcs_blks = 7; /* MCS blocks */ + hw->ip_vec = MCS_CNF10KB_INT_VEC_IP; /* IP vector */ } void cnf10kb_mcs_parser_cfg(struct mcs *mcs) @@ -212,3 +215,63 @@ void cnf10kb_mcs_tx_pn_wrapped_handler(struct mcs *mcs) mcs_add_intr_wq_entry(mcs, &event); } } + +void cnf10kb_mcs_bbe_intr_handler(struct mcs *mcs, u64 intr, + enum mcs_direction dir) +{ + struct mcs_intr_event event = { 0 }; + int i; + + if (!(intr & MCS_BBE_INT_MASK)) + return; + + event.mcs_id = mcs->mcs_id; + event.pcifunc = mcs->pf_map[0]; + + for (i = 0; i < MCS_MAX_BBE_INT; i++) { + if (!(intr & BIT_ULL(i))) + continue; + + /* Lower nibble denotes data fifo overflow interrupts and + * upper nibble indicates policy fifo overflow interrupts. + */ + if (intr & 0xFULL) + event.intr_mask = (dir == MCS_RX) ? + MCS_BBE_RX_DFIFO_OVERFLOW_INT : + MCS_BBE_TX_DFIFO_OVERFLOW_INT; + else + event.intr_mask = (dir == MCS_RX) ? + MCS_BBE_RX_PLFIFO_OVERFLOW_INT : + MCS_BBE_TX_PLFIFO_OVERFLOW_INT; + + /* Notify the lmac_id info which ran into BBE fatal error */ + event.lmac_id = i & 0x3ULL; + mcs_add_intr_wq_entry(mcs, &event); + } +} + +void cnf10kb_mcs_pab_intr_handler(struct mcs *mcs, u64 intr, + enum mcs_direction dir) +{ + struct mcs_intr_event event = { 0 }; + int i; + + if (!(intr & MCS_PAB_INT_MASK)) + return; + + event.mcs_id = mcs->mcs_id; + event.pcifunc = mcs->pf_map[0]; + + for (i = 0; i < MCS_MAX_PAB_INT; i++) { + if (!(intr & BIT_ULL(i))) + continue; + + event.intr_mask = (dir == MCS_RX) ? + MCS_PAB_RX_CHAN_OVERFLOW_INT : + MCS_PAB_TX_CHAN_OVERFLOW_INT; + + /* Notify the lmac_id info which ran into PAB fatal error */ + event.lmac_id = i; + mcs_add_intr_wq_entry(mcs, &event); + } +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h index 7427e3b1490f..f3ab01fc363c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h @@ -276,7 +276,10 @@ #define MCSX_BBE_RX_SLAVE_CAL_ENTRY 0x180ull #define MCSX_BBE_RX_SLAVE_CAL_LEN 0x188ull #define MCSX_PAB_RX_SLAVE_FIFO_SKID_CFGX(a) (0x290ull + (a) * 0x40ull) - +#define MCSX_BBE_RX_SLAVE_DFIFO_OVERFLOW_0 0xe20 +#define MCSX_BBE_TX_SLAVE_DFIFO_OVERFLOW_0 0x1298 +#define MCSX_BBE_RX_SLAVE_PLFIFO_OVERFLOW_0 0xe40 +#define MCSX_BBE_TX_SLAVE_PLFIFO_OVERFLOW_0 0x12b8 #define MCSX_BBE_RX_SLAVE_BBE_INT ({ \ u64 offset; \ \ From 699af748c61574125d269db260dabbe20436d74e Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 26 Apr 2023 11:55:24 +0530 Subject: [PATCH 09/69] octeontx2-pf: mcs: Fix NULL pointer dereferences When system is rebooted after creating macsec interface below NULL pointer dereference crashes occurred. This patch fixes those crashes by using correct order of teardown [ 3324.406942] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 3324.415726] Mem abort info: [ 3324.418510] ESR = 0x96000006 [ 3324.421557] EC = 0x25: DABT (current EL), IL = 32 bits [ 3324.426865] SET = 0, FnV = 0 [ 3324.429913] EA = 0, S1PTW = 0 [ 3324.433047] Data abort info: [ 3324.435921] ISV = 0, ISS = 0x00000006 [ 3324.439748] CM = 0, WnR = 0 .... [ 3324.575915] Call trace: [ 3324.578353] cn10k_mdo_del_secy+0x24/0x180 [ 3324.582440] macsec_common_dellink+0xec/0x120 [ 3324.586788] macsec_notify+0x17c/0x1c0 [ 3324.590529] raw_notifier_call_chain+0x50/0x70 [ 3324.594965] call_netdevice_notifiers_info+0x34/0x7c [ 3324.599921] rollback_registered_many+0x354/0x5bc [ 3324.604616] unregister_netdevice_queue+0x88/0x10c [ 3324.609399] unregister_netdev+0x20/0x30 [ 3324.613313] otx2_remove+0x8c/0x310 [ 3324.616794] pci_device_shutdown+0x30/0x70 [ 3324.620882] device_shutdown+0x11c/0x204 [ 966.664930] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 966.673712] Mem abort info: [ 966.676497] ESR = 0x96000006 [ 966.679543] EC = 0x25: DABT (current EL), IL = 32 bits [ 966.684848] SET = 0, FnV = 0 [ 966.687895] EA = 0, S1PTW = 0 [ 966.691028] Data abort info: [ 966.693900] ISV = 0, ISS = 0x00000006 [ 966.697729] CM = 0, WnR = 0 [ 966.833467] Call trace: [ 966.835904] cn10k_mdo_stop+0x20/0xa0 [ 966.839557] macsec_dev_stop+0xe8/0x11c [ 966.843384] __dev_close_many+0xbc/0x140 [ 966.847298] dev_close_many+0x84/0x120 [ 966.851039] rollback_registered_many+0x114/0x5bc [ 966.855735] unregister_netdevice_many.part.0+0x14/0xa0 [ 966.860952] unregister_netdevice_many+0x18/0x24 [ 966.865560] macsec_notify+0x1ac/0x1c0 [ 966.869303] raw_notifier_call_chain+0x50/0x70 [ 966.873738] call_netdevice_notifiers_info+0x34/0x7c [ 966.878694] rollback_registered_many+0x354/0x5bc [ 966.883390] unregister_netdevice_queue+0x88/0x10c [ 966.888173] unregister_netdev+0x20/0x30 [ 966.892090] otx2_remove+0x8c/0x310 [ 966.895571] pci_device_shutdown+0x30/0x70 [ 966.899660] device_shutdown+0x11c/0x204 [ 966.903574] __do_sys_reboot+0x208/0x290 [ 966.907487] __arm64_sys_reboot+0x20/0x30 [ 966.911489] el0_svc_handler+0x80/0x1c0 [ 966.915316] el0_svc+0x8/0x180 [ 966.918362] Code: f9400000 f9400a64 91220014 f94b3403 (f9400060) [ 966.924448] ---[ end trace 341778e799c3d8d7 ]--- Fixes: c54ffc73601c ("octeontx2-pf: mcs: Introduce MACSEC hardware offloading") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: Geetha sowjanya Reviewed-by: Leon Romanovsky Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 179433d0a54a..a75c944cc739 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -3073,8 +3073,6 @@ static void otx2_remove(struct pci_dev *pdev) otx2_config_pause_frm(pf); } - cn10k_mcs_free(pf); - #ifdef CONFIG_DCB /* Disable PFC config */ if (pf->pfc_en) { @@ -3088,6 +3086,7 @@ static void otx2_remove(struct pci_dev *pdev) otx2_unregister_dl(pf); unregister_netdev(netdev); + cn10k_mcs_free(pf); otx2_sriov_disable(pf->pdev); otx2_sriov_vfcfg_cleanup(pf); if (pf->otx2_wq) From 57d00d4364f314485092667d2a48718985515deb Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 26 Apr 2023 11:55:25 +0530 Subject: [PATCH 10/69] octeontx2-pf: mcs: Match macsec ethertype along with DMAC On CN10KB silicon a single hardware macsec block is present and offloads macsec operations for all the ethernet LMACs. TCAM match with macsec ethertype 0x88e5 alone at RX side is not sufficient to distinguish all the macsec interfaces created on top of netdevs. Hence append the DMAC of the macsec interface too. Otherwise the first created macsec interface only receives all the macsec traffic. Fixes: c54ffc73601c ("octeontx2-pf: mcs: Introduce MACSEC hardware offloading") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: Geetha sowjanya Reviewed-by: Leon Romanovsky Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index 9ec5f38d38a8..f699209978fe 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -9,6 +9,7 @@ #include #include "otx2_common.h" +#define MCS_TCAM0_MAC_DA_MASK GENMASK_ULL(47, 0) #define MCS_TCAM0_MAC_SA_MASK GENMASK_ULL(63, 48) #define MCS_TCAM1_MAC_SA_MASK GENMASK_ULL(31, 0) #define MCS_TCAM1_ETYPE_MASK GENMASK_ULL(47, 32) @@ -237,8 +238,10 @@ static int cn10k_mcs_write_rx_flowid(struct otx2_nic *pfvf, struct cn10k_mcs_rxsc *rxsc, u8 hw_secy_id) { struct macsec_rx_sc *sw_rx_sc = rxsc->sw_rxsc; + struct macsec_secy *secy = rxsc->sw_secy; struct mcs_flowid_entry_write_req *req; struct mbox *mbox = &pfvf->mbox; + u64 mac_da; int ret; mutex_lock(&mbox->lock); @@ -249,11 +252,16 @@ static int cn10k_mcs_write_rx_flowid(struct otx2_nic *pfvf, goto fail; } + mac_da = ether_addr_to_u64(secy->netdev->dev_addr); + + req->data[0] = FIELD_PREP(MCS_TCAM0_MAC_DA_MASK, mac_da); + req->mask[0] = ~0ULL; + req->mask[0] = ~MCS_TCAM0_MAC_DA_MASK; + req->data[1] = FIELD_PREP(MCS_TCAM1_ETYPE_MASK, ETH_P_MACSEC); req->mask[1] = ~0ULL; req->mask[1] &= ~MCS_TCAM1_ETYPE_MASK; - req->mask[0] = ~0ULL; req->mask[2] = ~0ULL; req->mask[3] = ~0ULL; From 815debbbf7b52026462c37eea3be70d6377a7a9a Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 26 Apr 2023 11:55:26 +0530 Subject: [PATCH 11/69] octeontx2-pf: mcs: Clear stats before freeing resource When freeing MCS hardware resources like SecY, SC and SA the corresponding stats needs to be cleared. Otherwise previous stats are shown in newly created macsec interfaces. Fixes: c54ffc73601c ("octeontx2-pf: mcs: Introduce MACSEC hardware offloading") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: Geetha sowjanya Reviewed-by: Leon Romanovsky Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index f699209978fe..13faca9add9f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -150,11 +150,20 @@ static void cn10k_mcs_free_rsrc(struct otx2_nic *pfvf, enum mcs_direction dir, enum mcs_rsrc_type type, u16 hw_rsrc_id, bool all) { + struct mcs_clear_stats *clear_req; struct mbox *mbox = &pfvf->mbox; struct mcs_free_rsrc_req *req; mutex_lock(&mbox->lock); + clear_req = otx2_mbox_alloc_msg_mcs_clear_stats(mbox); + if (!clear_req) + goto fail; + + clear_req->id = hw_rsrc_id; + clear_req->type = type; + clear_req->dir = dir; + req = otx2_mbox_alloc_msg_mcs_free_resources(mbox); if (!req) goto fail; From 9bdfe61054fb2b989eb58df20bf99c0cf67e3038 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 26 Apr 2023 11:55:27 +0530 Subject: [PATCH 12/69] octeontx2-pf: mcs: Fix shared counters logic Macsec stats like InPktsLate and InPktsDelayed share same counter in hardware. If SecY replay_protect is true then counter represents InPktsLate otherwise InPktsDelayed. This mode change was tracked based on protect_frames instead of replay_protect mistakenly. Similarly InPktsUnchecked and InPktsOk share same counter and mode change was tracked based on validate_check instead of validate_disabled. This patch fixes those problems. Fixes: c54ffc73601c ("octeontx2-pf: mcs: Introduce MACSEC hardware offloading") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: Geetha sowjanya Reviewed-by: Leon Romanovsky Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 14 +++++++------- .../ethernet/marvell/octeontx2/nic/otx2_common.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index 13faca9add9f..3ad8d7ef20be 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -1014,7 +1014,7 @@ static void cn10k_mcs_sync_stats(struct otx2_nic *pfvf, struct macsec_secy *secy /* Check if sync is really needed */ if (secy->validate_frames == txsc->last_validate_frames && - secy->protect_frames == txsc->last_protect_frames) + secy->replay_protect == txsc->last_replay_protect) return; cn10k_mcs_secy_stats(pfvf, txsc->hw_secy_id_rx, &rx_rsp, MCS_RX, true); @@ -1036,19 +1036,19 @@ static void cn10k_mcs_sync_stats(struct otx2_nic *pfvf, struct macsec_secy *secy rxsc->stats.InPktsInvalid += sc_rsp.pkt_invalid_cnt; rxsc->stats.InPktsNotValid += sc_rsp.pkt_notvalid_cnt; - if (txsc->last_protect_frames) + if (txsc->last_replay_protect) rxsc->stats.InPktsLate += sc_rsp.pkt_late_cnt; else rxsc->stats.InPktsDelayed += sc_rsp.pkt_late_cnt; - if (txsc->last_validate_frames == MACSEC_VALIDATE_CHECK) + if (txsc->last_validate_frames == MACSEC_VALIDATE_DISABLED) rxsc->stats.InPktsUnchecked += sc_rsp.pkt_unchecked_cnt; else rxsc->stats.InPktsOK += sc_rsp.pkt_unchecked_cnt; } txsc->last_validate_frames = secy->validate_frames; - txsc->last_protect_frames = secy->protect_frames; + txsc->last_replay_protect = secy->replay_protect; } static int cn10k_mdo_open(struct macsec_context *ctx) @@ -1117,7 +1117,7 @@ static int cn10k_mdo_add_secy(struct macsec_context *ctx) txsc->sw_secy = secy; txsc->encoding_sa = secy->tx_sc.encoding_sa; txsc->last_validate_frames = secy->validate_frames; - txsc->last_protect_frames = secy->protect_frames; + txsc->last_replay_protect = secy->replay_protect; list_add(&txsc->entry, &cfg->txsc_list); @@ -1538,12 +1538,12 @@ static int cn10k_mdo_get_rx_sc_stats(struct macsec_context *ctx) rxsc->stats.InPktsInvalid += rsp.pkt_invalid_cnt; rxsc->stats.InPktsNotValid += rsp.pkt_notvalid_cnt; - if (secy->protect_frames) + if (secy->replay_protect) rxsc->stats.InPktsLate += rsp.pkt_late_cnt; else rxsc->stats.InPktsDelayed += rsp.pkt_late_cnt; - if (secy->validate_frames == MACSEC_VALIDATE_CHECK) + if (secy->validate_frames == MACSEC_VALIDATE_DISABLED) rxsc->stats.InPktsUnchecked += rsp.pkt_unchecked_cnt; else rxsc->stats.InPktsOK += rsp.pkt_unchecked_cnt; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 3d22cc6a2804..f42b2b65bfd7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -389,7 +389,7 @@ struct cn10k_mcs_txsc { struct cn10k_txsc_stats stats; struct list_head entry; enum macsec_validation_type last_validate_frames; - bool last_protect_frames; + bool last_replay_protect; u16 hw_secy_id_tx; u16 hw_secy_id_rx; u16 hw_flow_id; From 3c99bace4ad08ad0264285ba8ad73117560992c2 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 26 Apr 2023 11:55:28 +0530 Subject: [PATCH 13/69] octeontx2-pf: mcs: Do not reset PN while updating secy After creating SecYs, SCs and SAs a SecY can be modified to change attributes like validation mode, protect frames mode etc. During this SecY update, packet number is reset to initial user given value by mistake. Hence do not reset PN when updating SecY parameters. Fixes: c54ffc73601c ("octeontx2-pf: mcs: Introduce MACSEC hardware offloading") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: Geetha sowjanya Reviewed-by: Leon Romanovsky Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index 3ad8d7ef20be..a487a98eac88 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -1134,6 +1134,7 @@ static int cn10k_mdo_upd_secy(struct macsec_context *ctx) struct macsec_secy *secy = ctx->secy; struct macsec_tx_sa *sw_tx_sa; struct cn10k_mcs_txsc *txsc; + bool active; u8 sa_num; int err; @@ -1141,15 +1142,19 @@ static int cn10k_mdo_upd_secy(struct macsec_context *ctx) if (!txsc) return -ENOENT; - txsc->encoding_sa = secy->tx_sc.encoding_sa; - - sa_num = txsc->encoding_sa; - sw_tx_sa = rcu_dereference_bh(secy->tx_sc.sa[sa_num]); + /* Encoding SA got changed */ + if (txsc->encoding_sa != secy->tx_sc.encoding_sa) { + txsc->encoding_sa = secy->tx_sc.encoding_sa; + sa_num = txsc->encoding_sa; + sw_tx_sa = rcu_dereference_bh(secy->tx_sc.sa[sa_num]); + active = sw_tx_sa ? sw_tx_sa->active : false; + cn10k_mcs_link_tx_sa2sc(pfvf, secy, txsc, sa_num, active); + } if (netif_running(secy->netdev)) { cn10k_mcs_sync_stats(pfvf, secy, txsc); - err = cn10k_mcs_secy_tx_cfg(pfvf, secy, txsc, sw_tx_sa, sa_num); + err = cn10k_mcs_secy_tx_cfg(pfvf, secy, txsc, NULL, 0); if (err) return err; } From 6f75cd166a5a3c0bc50441faa8b8304f60522fdd Mon Sep 17 00:00:00 2001 From: Cosmo Chou Date: Wed, 26 Apr 2023 16:13:50 +0800 Subject: [PATCH 14/69] net/ncsi: clear Tx enable mode when handling a Config required AEN ncsi_channel_is_tx() determines whether a given channel should be used for Tx or not. However, when reconfiguring the channel by handling a Configuration Required AEN, there is a misjudgment that the channel Tx has already been enabled, which results in the Enable Channel Network Tx command not being sent. Clear the channel Tx enable flag before reconfiguring the channel to avoid the misjudgment. Fixes: 8d951a75d022 ("net/ncsi: Configure multi-package, multi-channel modes with failover") Signed-off-by: Cosmo Chou Signed-off-by: David S. Miller --- net/ncsi/ncsi-aen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index b635c194f0a8..62fb1031763d 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -165,6 +165,7 @@ static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp, nc->state = NCSI_CHANNEL_INACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); + nc->modes[NCSI_MODE_TX_ENABLE].enable = 0; return ncsi_process_next_channel(ndp); } From 7e692df3933628d974acb9f5b334d2b3e885e2a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Apr 2023 04:32:31 +0000 Subject: [PATCH 15/69] tcp: fix skb_copy_ubufs() vs BIG TCP David Ahern reported crashes in skb_copy_ubufs() caused by TCP tx zerocopy using hugepages, and skb length bigger than ~68 KB. skb_copy_ubufs() assumed it could copy all payload using up to MAX_SKB_FRAGS order-0 pages. This assumption broke when BIG TCP was able to put up to 512 KB per skb. We did not hit this bug at Google because we use CONFIG_MAX_SKB_FRAGS=45 and limit gso_max_size to 180000. A solution is to use higher order pages if needed. v2: add missing __GFP_COMP, or we leak memory. Fixes: 7c4e983c4f3c ("net: allow gso_max_size to exceed 65536") Reported-by: David Ahern Link: https://lore.kernel.org/netdev/c70000f6-baa4-4a05-46d0-4b3e0dc1ccc8@gmail.com/T/ Signed-off-by: Eric Dumazet Cc: Xin Long Cc: Willem de Bruijn Cc: Coco Li Signed-off-by: David S. Miller --- net/core/skbuff.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2112146092bf..26a586007d8b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1758,7 +1758,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { int num_frags = skb_shinfo(skb)->nr_frags; struct page *page, *head = NULL; - int i, new_frags; + int i, order, psize, new_frags; u32 d_off; if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) @@ -1767,9 +1767,17 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) if (!num_frags) goto release; - new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; + /* We might have to allocate high order pages, so compute what minimum + * page order is needed. + */ + order = 0; + while ((PAGE_SIZE << order) * MAX_SKB_FRAGS < __skb_pagelen(skb)) + order++; + psize = (PAGE_SIZE << order); + + new_frags = (__skb_pagelen(skb) + psize - 1) >> (PAGE_SHIFT + order); for (i = 0; i < new_frags; i++) { - page = alloc_page(gfp_mask); + page = alloc_pages(gfp_mask | __GFP_COMP, order); if (!page) { while (head) { struct page *next = (struct page *)page_private(head); @@ -1796,11 +1804,11 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) vaddr = kmap_atomic(p); while (done < p_len) { - if (d_off == PAGE_SIZE) { + if (d_off == psize) { d_off = 0; page = (struct page *)page_private(page); } - copy = min_t(u32, PAGE_SIZE - d_off, p_len - done); + copy = min_t(u32, psize - d_off, p_len - done); memcpy(page_address(page) + d_off, vaddr + p_off + done, copy); done += copy; @@ -1816,7 +1824,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) /* skb frags point to kernel buffers */ for (i = 0; i < new_frags - 1; i++) { - __skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE); + __skb_fill_page_desc(skb, i, head, 0, psize); head = (struct page *)page_private(head); } __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); From e0807c430239d62d8dd7a25552e469aad8c3dd28 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 27 Apr 2023 15:39:48 +0200 Subject: [PATCH 16/69] mISDN: Use list_count_nodes() count_list_member() really looks the same as list_count_nodes(), so use the latter instead of hand writing it. The first one return an int and the other a size_t, but that should be fine. It is really unlikely that we get so many parties in a conference. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/isdn/mISDN/dsp_cmx.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c index 6d2088fbaf69..357b87592eb4 100644 --- a/drivers/isdn/mISDN/dsp_cmx.c +++ b/drivers/isdn/mISDN/dsp_cmx.c @@ -141,17 +141,6 @@ /*#define CMX_DELAY_DEBUG * gives rx-buffer delay overview */ /*#define CMX_TX_DEBUG * massive read/write on tx-buffer with content */ -static inline int -count_list_member(struct list_head *head) -{ - int cnt = 0; - struct list_head *m; - - list_for_each(m, head) - cnt++; - return cnt; -} - /* * debug cmx memory structure */ @@ -1672,7 +1661,7 @@ dsp_cmx_send(void *arg) mustmix = 0; members = 0; if (conf) { - members = count_list_member(&conf->mlist); + members = list_count_nodes(&conf->mlist); #ifdef CMX_CONF_DEBUG if (conf->software && members > 1) #else @@ -1695,7 +1684,7 @@ dsp_cmx_send(void *arg) /* loop all members that require conference mixing */ list_for_each_entry(conf, &conf_ilist, list) { /* count members and check hardware */ - members = count_list_member(&conf->mlist); + members = list_count_nodes(&conf->mlist); #ifdef CMX_CONF_DEBUG if (conf->software && members > 1) { #else From da94a7781fc3c92e7df7832bc2746f4d39bc624e Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 26 Apr 2023 14:31:11 +0200 Subject: [PATCH 17/69] net/sched: cls_api: remove block_cb from driver_list before freeing Error handler of tcf_block_bind() frees the whole bo->cb_list on error. However, by that time the flow_block_cb instances are already in the driver list because driver ndo_setup_tc() callback is called before that up the call chain in tcf_block_offload_cmd(). This leaves dangling pointers to freed objects in the list and causes use-after-free[0]. Fix it by also removing flow_block_cb instances from driver_list before deallocating them. [0]: [ 279.868433] ================================================================== [ 279.869964] BUG: KASAN: slab-use-after-free in flow_block_cb_setup_simple+0x631/0x7c0 [ 279.871527] Read of size 8 at addr ffff888147e2bf20 by task tc/2963 [ 279.873151] CPU: 6 PID: 2963 Comm: tc Not tainted 6.3.0-rc6+ #4 [ 279.874273] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 279.876295] Call Trace: [ 279.876882] [ 279.877413] dump_stack_lvl+0x33/0x50 [ 279.878198] print_report+0xc2/0x610 [ 279.878987] ? flow_block_cb_setup_simple+0x631/0x7c0 [ 279.879994] kasan_report+0xae/0xe0 [ 279.880750] ? flow_block_cb_setup_simple+0x631/0x7c0 [ 279.881744] ? mlx5e_tc_reoffload_flows_work+0x240/0x240 [mlx5_core] [ 279.883047] flow_block_cb_setup_simple+0x631/0x7c0 [ 279.884027] tcf_block_offload_cmd.isra.0+0x189/0x2d0 [ 279.885037] ? tcf_block_setup+0x6b0/0x6b0 [ 279.885901] ? mutex_lock+0x7d/0xd0 [ 279.886669] ? __mutex_unlock_slowpath.constprop.0+0x2d0/0x2d0 [ 279.887844] ? ingress_init+0x1c0/0x1c0 [sch_ingress] [ 279.888846] tcf_block_get_ext+0x61c/0x1200 [ 279.889711] ingress_init+0x112/0x1c0 [sch_ingress] [ 279.890682] ? clsact_init+0x2b0/0x2b0 [sch_ingress] [ 279.891701] qdisc_create+0x401/0xea0 [ 279.892485] ? qdisc_tree_reduce_backlog+0x470/0x470 [ 279.893473] tc_modify_qdisc+0x6f7/0x16d0 [ 279.894344] ? tc_get_qdisc+0xac0/0xac0 [ 279.895213] ? mutex_lock+0x7d/0xd0 [ 279.896005] ? __mutex_lock_slowpath+0x10/0x10 [ 279.896910] rtnetlink_rcv_msg+0x5fe/0x9d0 [ 279.897770] ? rtnl_calcit.isra.0+0x2b0/0x2b0 [ 279.898672] ? __sys_sendmsg+0xb5/0x140 [ 279.899494] ? do_syscall_64+0x3d/0x90 [ 279.900302] ? entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 279.901337] ? kasan_save_stack+0x2e/0x40 [ 279.902177] ? kasan_save_stack+0x1e/0x40 [ 279.903058] ? kasan_set_track+0x21/0x30 [ 279.903913] ? kasan_save_free_info+0x2a/0x40 [ 279.904836] ? ____kasan_slab_free+0x11a/0x1b0 [ 279.905741] ? kmem_cache_free+0x179/0x400 [ 279.906599] netlink_rcv_skb+0x12c/0x360 [ 279.907450] ? rtnl_calcit.isra.0+0x2b0/0x2b0 [ 279.908360] ? netlink_ack+0x1550/0x1550 [ 279.909192] ? rhashtable_walk_peek+0x170/0x170 [ 279.910135] ? kmem_cache_alloc_node+0x1af/0x390 [ 279.911086] ? _copy_from_iter+0x3d6/0xc70 [ 279.912031] netlink_unicast+0x553/0x790 [ 279.912864] ? netlink_attachskb+0x6a0/0x6a0 [ 279.913763] ? netlink_recvmsg+0x416/0xb50 [ 279.914627] netlink_sendmsg+0x7a1/0xcb0 [ 279.915473] ? netlink_unicast+0x790/0x790 [ 279.916334] ? iovec_from_user.part.0+0x4d/0x220 [ 279.917293] ? netlink_unicast+0x790/0x790 [ 279.918159] sock_sendmsg+0xc5/0x190 [ 279.918938] ____sys_sendmsg+0x535/0x6b0 [ 279.919813] ? import_iovec+0x7/0x10 [ 279.920601] ? kernel_sendmsg+0x30/0x30 [ 279.921423] ? __copy_msghdr+0x3c0/0x3c0 [ 279.922254] ? import_iovec+0x7/0x10 [ 279.923041] ___sys_sendmsg+0xeb/0x170 [ 279.923854] ? copy_msghdr_from_user+0x110/0x110 [ 279.924797] ? ___sys_recvmsg+0xd9/0x130 [ 279.925630] ? __perf_event_task_sched_in+0x183/0x470 [ 279.926656] ? ___sys_sendmsg+0x170/0x170 [ 279.927529] ? ctx_sched_in+0x530/0x530 [ 279.928369] ? update_curr+0x283/0x4f0 [ 279.929185] ? perf_event_update_userpage+0x570/0x570 [ 279.930201] ? __fget_light+0x57/0x520 [ 279.931023] ? __switch_to+0x53d/0xe70 [ 279.931846] ? sockfd_lookup_light+0x1a/0x140 [ 279.932761] __sys_sendmsg+0xb5/0x140 [ 279.933560] ? __sys_sendmsg_sock+0x20/0x20 [ 279.934436] ? fpregs_assert_state_consistent+0x1d/0xa0 [ 279.935490] do_syscall_64+0x3d/0x90 [ 279.936300] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 279.937311] RIP: 0033:0x7f21c814f887 [ 279.938085] Code: 0a 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 279.941448] RSP: 002b:00007fff11efd478 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 279.942964] RAX: ffffffffffffffda RBX: 0000000064401979 RCX: 00007f21c814f887 [ 279.944337] RDX: 0000000000000000 RSI: 00007fff11efd4e0 RDI: 0000000000000003 [ 279.945660] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 [ 279.947003] R10: 00007f21c8008708 R11: 0000000000000246 R12: 0000000000000001 [ 279.948345] R13: 0000000000409980 R14: 000000000047e538 R15: 0000000000485400 [ 279.949690] [ 279.950706] Allocated by task 2960: [ 279.951471] kasan_save_stack+0x1e/0x40 [ 279.952338] kasan_set_track+0x21/0x30 [ 279.953165] __kasan_kmalloc+0x77/0x90 [ 279.954006] flow_block_cb_setup_simple+0x3dd/0x7c0 [ 279.955001] tcf_block_offload_cmd.isra.0+0x189/0x2d0 [ 279.956020] tcf_block_get_ext+0x61c/0x1200 [ 279.956881] ingress_init+0x112/0x1c0 [sch_ingress] [ 279.957873] qdisc_create+0x401/0xea0 [ 279.958656] tc_modify_qdisc+0x6f7/0x16d0 [ 279.959506] rtnetlink_rcv_msg+0x5fe/0x9d0 [ 279.960392] netlink_rcv_skb+0x12c/0x360 [ 279.961216] netlink_unicast+0x553/0x790 [ 279.962044] netlink_sendmsg+0x7a1/0xcb0 [ 279.962906] sock_sendmsg+0xc5/0x190 [ 279.963702] ____sys_sendmsg+0x535/0x6b0 [ 279.964534] ___sys_sendmsg+0xeb/0x170 [ 279.965343] __sys_sendmsg+0xb5/0x140 [ 279.966132] do_syscall_64+0x3d/0x90 [ 279.966908] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 279.968407] Freed by task 2960: [ 279.969114] kasan_save_stack+0x1e/0x40 [ 279.969929] kasan_set_track+0x21/0x30 [ 279.970729] kasan_save_free_info+0x2a/0x40 [ 279.971603] ____kasan_slab_free+0x11a/0x1b0 [ 279.972483] __kmem_cache_free+0x14d/0x280 [ 279.973337] tcf_block_setup+0x29d/0x6b0 [ 279.974173] tcf_block_offload_cmd.isra.0+0x226/0x2d0 [ 279.975186] tcf_block_get_ext+0x61c/0x1200 [ 279.976080] ingress_init+0x112/0x1c0 [sch_ingress] [ 279.977065] qdisc_create+0x401/0xea0 [ 279.977857] tc_modify_qdisc+0x6f7/0x16d0 [ 279.978695] rtnetlink_rcv_msg+0x5fe/0x9d0 [ 279.979562] netlink_rcv_skb+0x12c/0x360 [ 279.980388] netlink_unicast+0x553/0x790 [ 279.981214] netlink_sendmsg+0x7a1/0xcb0 [ 279.982043] sock_sendmsg+0xc5/0x190 [ 279.982827] ____sys_sendmsg+0x535/0x6b0 [ 279.983703] ___sys_sendmsg+0xeb/0x170 [ 279.984510] __sys_sendmsg+0xb5/0x140 [ 279.985298] do_syscall_64+0x3d/0x90 [ 279.986076] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 279.987532] The buggy address belongs to the object at ffff888147e2bf00 which belongs to the cache kmalloc-192 of size 192 [ 279.989747] The buggy address is located 32 bytes inside of freed 192-byte region [ffff888147e2bf00, ffff888147e2bfc0) [ 279.992367] The buggy address belongs to the physical page: [ 279.993430] page:00000000550f405c refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x147e2a [ 279.995182] head:00000000550f405c order:1 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [ 279.996713] anon flags: 0x200000000010200(slab|head|node=0|zone=2) [ 279.997878] raw: 0200000000010200 ffff888100042a00 0000000000000000 dead000000000001 [ 279.999384] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 [ 280.000894] page dumped because: kasan: bad access detected [ 280.002386] Memory state around the buggy address: [ 280.003338] ffff888147e2be00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 280.004781] ffff888147e2be80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 280.006224] >ffff888147e2bf00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 280.007700] ^ [ 280.008592] ffff888147e2bf80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 280.010035] ffff888147e2c000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 280.011564] ================================================================== Fixes: 59094b1e5094 ("net: sched: use flow block API") Signed-off-by: Vlad Buslov Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3c3629c9e7b6..2621550bfddc 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1589,6 +1589,7 @@ static int tcf_block_bind(struct tcf_block *block, err_unroll: list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { + list_del(&block_cb->driver_list); if (i-- > 0) { list_del(&block_cb->list); tcf_block_playback_offloads(block, block_cb->cb, From c88f8d5cd95fd039cff95d682b8e71100c001df0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 26 Apr 2023 23:00:06 -0700 Subject: [PATCH 18/69] sit: update dev->needed_headroom in ipip6_tunnel_bind_dev() When a tunnel device is bound with the underlying device, its dev->needed_headroom needs to be updated properly. IPv4 tunnels already do the same in ip_tunnel_bind_dev(). Otherwise we may not have enough header room for skb, especially after commit b17f709a2401 ("gue: TX support for using remote checksum offload option"). Fixes: 32b8a8e59c9c ("sit: add IPv4 over IPv4 support") Reported-by: Palash Oswal Link: https://lore.kernel.org/netdev/CAGyP=7fDcSPKu6nttbGwt7RXzE3uyYxLjCSE97J64pRxJP8jPA@mail.gmail.com/ Cc: Kuniyuki Iwashima Cc: Eric Dumazet Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv6/sit.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 063560e2cb1a..cc24cefdb85c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1095,12 +1095,13 @@ tx_err: static void ipip6_tunnel_bind_dev(struct net_device *dev) { + struct ip_tunnel *tunnel = netdev_priv(dev); + int t_hlen = tunnel->hlen + sizeof(struct iphdr); struct net_device *tdev = NULL; - struct ip_tunnel *tunnel; + int hlen = LL_MAX_HEADER; const struct iphdr *iph; struct flowi4 fl4; - tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; if (iph->daddr) { @@ -1123,14 +1124,15 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev && !netif_is_l3_master(tdev)) { - int t_hlen = tunnel->hlen + sizeof(struct iphdr); int mtu; mtu = tdev->mtu - t_hlen; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; WRITE_ONCE(dev->mtu, mtu); + hlen = tdev->hard_header_len + tdev->needed_headroom; } + dev->needed_headroom = t_hlen + hlen; } static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, From 042334a8d424a1917e916e611a8dda7f8caf1491 Mon Sep 17 00:00:00 2001 From: wuych Date: Thu, 27 Apr 2023 18:25:31 +0800 Subject: [PATCH 19/69] atlantic:hw_atl2:hw_atl2_utils_fw: Remove unnecessary (void*) conversions Pointer variables of void * type do not require type cast. Signed-off-by: wuych Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c index 58d426dda3ed..674683b54304 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c @@ -336,7 +336,7 @@ static int aq_a2_fw_get_mac_permanent(struct aq_hw_s *self, u8 *mac) static void aq_a2_fill_a0_stats(struct aq_hw_s *self, struct statistics_s *stats) { - struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; + struct hw_atl2_priv *priv = self->priv; struct aq_stats_s *cs = &self->curr_stats; struct aq_stats_s curr_stats = *cs; bool corrupted_stats = false; @@ -378,7 +378,7 @@ do { \ static void aq_a2_fill_b0_stats(struct aq_hw_s *self, struct statistics_s *stats) { - struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; + struct hw_atl2_priv *priv = self->priv; struct aq_stats_s *cs = &self->curr_stats; struct aq_stats_s curr_stats = *cs; bool corrupted_stats = false; From 46ef24c60f8ee70662968ac55325297ed4624d61 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Thu, 27 Apr 2023 11:49:23 +0200 Subject: [PATCH 20/69] selftests: srv6: make srv6_end_dt46_l3vpn_test more robust On some distributions, the rp_filter is automatically set (=1) by default on a netdev basis (also on VRFs). In an SRv6 End.DT46 behavior, decapsulated IPv4 packets are routed using the table associated with the VRF bound to that tunnel. During lookup operations, the rp_filter can lead to packet loss when activated on the VRF. Therefore, we chose to make this selftest more robust by explicitly disabling the rp_filter during tests (as it is automatically set by some Linux distributions). Fixes: 03a0b567a03d ("selftests: seg6: add selftest for SRv6 End.DT46 Behavior") Reported-by: Hangbin Liu Signed-off-by: Andrea Mayer Tested-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- .../testing/selftests/net/srv6_end_dt46_l3vpn_test.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh index aebaab8ce44c..441eededa031 100755 --- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -292,6 +292,11 @@ setup_hs() ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad @@ -316,11 +321,6 @@ setup_hs() ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0 - ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } From dc6456e938e938d64ffb6383a286b2ac9790a37f Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 27 Apr 2023 11:21:59 +0200 Subject: [PATCH 21/69] net: ipv6: fix skb hash for some RST packets The skb hash comes from sk->sk_txhash when using TCP, except for some IPv6 RST packets. This is because in tcp_v6_send_reset when not in TIME_WAIT the hash is taken from sk->sk_hash, while it should come from sk->sk_txhash as those two hashes are not computed the same way. Packetdrill script to test the above, 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) +0 > (flowlabel 0x1) S 0:0(0) <...> // Wrong ack seq, trigger a rst. +0 < S. 0:0(0) ack 0 win 4000 // Check the flowlabel matches prior one from SYN. +0 > (flowlabel 0x1) R 0:0(0) <...> Fixes: 9258b8b1be2e ("ipv6: tcp: send consistent autoflowlabel in RST packets") Signed-off-by: Antoine Tenart Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 244cf86c4cbb..7132eb213a7a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1065,7 +1065,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) if (np->repflow) label = ip6_flowlabel(ipv6h); priority = sk->sk_priority; - txhash = sk->sk_hash; + txhash = sk->sk_txhash; } if (sk->sk_state == TCP_TIME_WAIT) { label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); From 6686317855c6997671982d4489ccdd946f644957 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Wed, 26 Apr 2023 22:28:15 +0200 Subject: [PATCH 22/69] net: dsa: mv88e6xxx: add mv88e6321 rsvd2cpu Add rsvd2cpu capability for mv88e6321 model, to allow proper bpdu processing. Signed-off-by: Angelo Dureghello Fixes: 51c901a775621 ("net: dsa: mv88e6xxx: distinguish Global 2 Rsvd2CPU") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index dc263cea205f..64a2f2f83735 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5194,6 +5194,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, From 526f28bd0fbdc699cda31426928802650c1528e5 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Wed, 26 Apr 2023 15:19:40 +0000 Subject: [PATCH 23/69] net/sched: act_mirred: Add carrier check There are cases where the device is adminstratively UP, but operationally down. For example, we have a physical device (Nvidia ConnectX-6 Dx, 25Gbps) who's cable was pulled out, here is its ip link output: 5: ens2f1: mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000 link/ether b8:ce:f6:4b:68:35 brd ff:ff:ff:ff:ff:ff altname enp179s0f1np1 As you can see, it's administratively UP but operationally down. In this case, sending a packet to this port caused a nasty kernel hang (so nasty that we were unable to capture it). Aborting a transmit based on operational status (in addition to administrative status) fixes the issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira v1->v2: Add fixes tag v2->v3: Remove blank line between tags + add change log, suggested by Leon Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index ec43764e92e7..0a711c184c29 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -264,7 +264,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, goto out; } - if (unlikely(!(dev->flags & IFF_UP))) { + if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); goto out; From 8ceda6d5a1e5402fd852e6cc59a286ce3dc545ee Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Fri, 28 Apr 2023 16:53:29 +0800 Subject: [PATCH 24/69] r8152: fix flow control issue of RTL8156A The feature of flow control becomes abnormal, if the device sends a pause frame and the tx/rx is disabled before sending a release frame. It causes the lost of packets. Set PLA_RX_FIFO_FULL and PLA_RX_FIFO_EMPTY to zeros before disabling the tx/rx. And, toggle FC_PATCH_TASK before enabling tx/rx to reset the flow control patch and timer. Then, the hardware could clear the state and the flow control becomes normal after enabling tx/rx. Besides, remove inline for fc_pause_on_auto() and fc_pause_off_auto(). Fixes: 195aae321c82 ("r8152: support new chips") Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 56 ++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0fc4b959edc1..afd50e90d1fe 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5986,6 +5986,25 @@ static void rtl8153_disable(struct r8152 *tp) r8153_aldps_en(tp, true); } +static u32 fc_pause_on_auto(struct r8152 *tp) +{ + return (ALIGN(mtu_to_size(tp->netdev->mtu), 1024) + 6 * 1024); +} + +static u32 fc_pause_off_auto(struct r8152 *tp) +{ + return (ALIGN(mtu_to_size(tp->netdev->mtu), 1024) + 14 * 1024); +} + +static void r8156_fc_parameter(struct r8152 *tp) +{ + u32 pause_on = tp->fc_pause_on ? tp->fc_pause_on : fc_pause_on_auto(tp); + u32 pause_off = tp->fc_pause_off ? tp->fc_pause_off : fc_pause_off_auto(tp); + + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16); +} + static int rtl8156_enable(struct r8152 *tp) { u32 ocp_data; @@ -5994,6 +6013,7 @@ static int rtl8156_enable(struct r8152 *tp) if (test_bit(RTL8152_UNPLUG, &tp->flags)) return -ENODEV; + r8156_fc_parameter(tp); set_tx_qlen(tp); rtl_set_eee_plus(tp); r8153_set_rx_early_timeout(tp); @@ -6025,9 +6045,24 @@ static int rtl8156_enable(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_USB, USB_L1_CTRL, ocp_data); } + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK); + ocp_data &= ~FC_PATCH_TASK; + ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + usleep_range(1000, 2000); + ocp_data |= FC_PATCH_TASK; + ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + return rtl_enable(tp); } +static void rtl8156_disable(struct r8152 *tp) +{ + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, 0); + + rtl8153_disable(tp); +} + static int rtl8156b_enable(struct r8152 *tp) { u32 ocp_data; @@ -6429,25 +6464,6 @@ static void rtl8153c_up(struct r8152 *tp) r8153b_u1u2en(tp, true); } -static inline u32 fc_pause_on_auto(struct r8152 *tp) -{ - return (ALIGN(mtu_to_size(tp->netdev->mtu), 1024) + 6 * 1024); -} - -static inline u32 fc_pause_off_auto(struct r8152 *tp) -{ - return (ALIGN(mtu_to_size(tp->netdev->mtu), 1024) + 14 * 1024); -} - -static void r8156_fc_parameter(struct r8152 *tp) -{ - u32 pause_on = tp->fc_pause_on ? tp->fc_pause_on : fc_pause_on_auto(tp); - u32 pause_off = tp->fc_pause_off ? tp->fc_pause_off : fc_pause_off_auto(tp); - - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16); -} - static void rtl8156_change_mtu(struct r8152 *tp) { u32 rx_max_size = mtu_to_size(tp->netdev->mtu); @@ -9340,7 +9356,7 @@ static int rtl_ops_init(struct r8152 *tp) case RTL_VER_10: ops->init = r8156_init; ops->enable = rtl8156_enable; - ops->disable = rtl8153_disable; + ops->disable = rtl8156_disable; ops->up = rtl8156_up; ops->down = rtl8156_down; ops->unload = rtl8153_unload; From 61b0ad6f58e2066e054c6d4839d67974d2861a7d Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Fri, 28 Apr 2023 16:53:30 +0800 Subject: [PATCH 25/69] r8152: fix the poor throughput for 2.5G devices Fix the poor throughput for 2.5G devices, when changing the speed from auto mode to force mode. This patch is used to notify the MAC when the mode is changed. Fixes: 195aae321c82 ("r8152: support new chips") Signed-off-by: Hayes Wang Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index afd50e90d1fe..58670a65b840 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -199,6 +199,7 @@ #define OCP_EEE_AR 0xa41a #define OCP_EEE_DATA 0xa41c #define OCP_PHY_STATUS 0xa420 +#define OCP_INTR_EN 0xa424 #define OCP_NCTL_CFG 0xa42c #define OCP_POWER_CFG 0xa430 #define OCP_EEE_CFG 0xa432 @@ -620,6 +621,9 @@ enum spd_duplex { #define PHY_STAT_LAN_ON 3 #define PHY_STAT_PWRDN 5 +/* OCP_INTR_EN */ +#define INTR_SPEED_FORCE BIT(3) + /* OCP_NCTL_CFG */ #define PGA_RETURN_EN BIT(1) @@ -7554,6 +7558,11 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) ((swap_a & 0x1f) << 8) | ((swap_a >> 8) & 0x1f)); } + + /* Notify the MAC when the speed is changed to force mode. */ + data = ocp_reg_read(tp, OCP_INTR_EN); + data |= INTR_SPEED_FORCE; + ocp_reg_write(tp, OCP_INTR_EN, data); break; default: break; @@ -7949,6 +7958,11 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) break; } + /* Notify the MAC when the speed is changed to force mode. */ + data = ocp_reg_read(tp, OCP_INTR_EN); + data |= INTR_SPEED_FORCE; + ocp_reg_write(tp, OCP_INTR_EN, data); + if (rtl_phy_patch_request(tp, true, true)) return; From cce8334f4aacd9936309a002d4a4de92a07cd2c2 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Fri, 28 Apr 2023 16:53:31 +0800 Subject: [PATCH 26/69] r8152: move setting r8153b_rx_agg_chg_indicate() Move setting r8153b_rx_agg_chg_indicate() for 2.5G devices. The r8153b_rx_agg_chg_indicate() has to be called after enabling tx/rx. Otherwise, the coalescing settings are useless. Fixes: 195aae321c82 ("r8152: support new chips") Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 58670a65b840..755b0f72dd44 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3027,12 +3027,16 @@ static int rtl_enable(struct r8152 *tp) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, ocp_data); switch (tp->version) { - case RTL_VER_08: - case RTL_VER_09: - case RTL_VER_14: - r8153b_rx_agg_chg_indicate(tp); + case RTL_VER_01: + case RTL_VER_02: + case RTL_VER_03: + case RTL_VER_04: + case RTL_VER_05: + case RTL_VER_06: + case RTL_VER_07: break; default: + r8153b_rx_agg_chg_indicate(tp); break; } @@ -3086,7 +3090,6 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp) 640 / 8); ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EXTRA_AGGR_TMR, ocp_data); - r8153b_rx_agg_chg_indicate(tp); break; default: @@ -3120,7 +3123,6 @@ static void r8153_set_rx_early_size(struct r8152 *tp) case RTL_VER_15: ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data / 8); - r8153b_rx_agg_chg_indicate(tp); break; default: WARN_ON_ONCE(1); From 281900a923d4c50df109b52a22ae3cdac150159b Mon Sep 17 00:00:00 2001 From: Andy Moreton Date: Fri, 28 Apr 2023 12:33:33 +0100 Subject: [PATCH 27/69] sfc: Fix module EEPROM reporting for QSFP modules The sfc driver does not report QSFP module EEPROM contents correctly as only the first page is fetched from hardware. Commit 0e1a2a3e6e7d ("ethtool: Add SFF-8436 and SFF-8636 max EEPROM length definitions") added ETH_MODULE_SFF_8436_MAX_LEN for the overall size of the EEPROM info, so use that to report the full EEPROM contents. Fixes: 9b17010da57a ("sfc: Add ethtool -m support for QSFP modules") Signed-off-by: Andy Moreton Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi_port_common.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.c b/drivers/net/ethernet/sfc/mcdi_port_common.c index 899cc1671004..0ab14f3d01d4 100644 --- a/drivers/net/ethernet/sfc/mcdi_port_common.c +++ b/drivers/net/ethernet/sfc/mcdi_port_common.c @@ -972,12 +972,15 @@ static u32 efx_mcdi_phy_module_type(struct efx_nic *efx) /* A QSFP+ NIC may actually have an SFP+ module attached. * The ID is page 0, byte 0. + * QSFP28 is of type SFF_8636, however, this is treated + * the same by ethtool, so we can also treat them the same. */ switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) { - case 0x3: + case 0x3: /* SFP */ return MC_CMD_MEDIA_SFP_PLUS; - case 0xc: - case 0xd: + case 0xc: /* QSFP */ + case 0xd: /* QSFP+ */ + case 0x11: /* QSFP28 */ return MC_CMD_MEDIA_QSFP_PLUS; default: return 0; @@ -1075,7 +1078,7 @@ int efx_mcdi_phy_get_module_info(struct efx_nic *efx, struct ethtool_modinfo *mo case MC_CMD_MEDIA_QSFP_PLUS: modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; break; default: From 4f163bf82b0244bf4d1e9a6b0f4cf4e90b42496e Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 28 Apr 2023 17:43:21 -0400 Subject: [PATCH 28/69] net: atlantic: Define aq_pm_ops conditionally on CONFIG_PM For s390, gcc with W=1 reports drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c:458:32: error: 'aq_pm_ops' defined but not used [-Werror=unused-const-variable=] 458 | static const struct dev_pm_ops aq_pm_ops = { | ^~~~~~~~~ The only use of aq_pm_ops is conditional on CONFIG_PM. The definition of aq_pm_ops and its functions should also be conditional on CONFIG_PM. Signed-off-by: Tom Rix Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 8647125d60ae..baa5f8cc31f2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -379,6 +379,7 @@ static void aq_pci_shutdown(struct pci_dev *pdev) } } +#ifdef CONFIG_PM static int aq_suspend_common(struct device *dev) { struct aq_nic_s *nic = pci_get_drvdata(to_pci_dev(dev)); @@ -463,6 +464,7 @@ static const struct dev_pm_ops aq_pm_ops = { .restore = aq_pm_resume_restore, .thaw = aq_pm_thaw, }; +#endif static struct pci_driver aq_pci_ops = { .name = AQ_CFG_DRV_NAME, From 0d098d83c5d9e107b2df7f5e11f81492f56d2fe7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 28 Apr 2023 21:27:54 +0100 Subject: [PATCH 29/69] rxrpc: Fix hard call timeout units The hard call timeout is specified in the RXRPC_SET_CALL_TIMEOUT cmsg in seconds, so fix the point at which sendmsg() applies it to the call to convert to jiffies from seconds, not milliseconds. Fixes: a158bdd3247b ("rxrpc: Fix timeout of a call that hasn't yet been granted a channel") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- net/rxrpc/sendmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 6caa47d352ed..7498a77b5d39 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -699,7 +699,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) fallthrough; case 1: if (p.call.timeouts.hard > 0) { - j = msecs_to_jiffies(p.call.timeouts.hard); + j = p.call.timeouts.hard * HZ; now = jiffies; j += now; WRITE_ONCE(call->expect_term_by, j); From 0eb362d254814ce04848730bf32e75b8ee1a4d6c Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 28 Apr 2023 21:27:55 +0100 Subject: [PATCH 30/69] rxrpc: Make it so that a waiting process can be aborted When sendmsg() creates an rxrpc call, it queues it to wait for a connection and channel to be assigned and then waits before it can start shovelling data as the encrypted DATA packet content includes a summary of the connection parameters. However, sendmsg() may get interrupted before a connection gets assigned and further sendmsg() calls will fail with EBUSY until an assignment is made. Fix this so that the call can at least be aborted without failing on EBUSY. We have to be careful here as sendmsg() mustn't be allowed to start the call timer if the call doesn't yet have a connection assigned as an oops may follow shortly thereafter. Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg") Reported-by: Marc Dionne Signed-off-by: David Howells cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- net/rxrpc/sendmsg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 7498a77b5d39..c1b074c17b33 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -656,10 +656,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) goto out_put_unlock; } else { switch (rxrpc_call_state(call)) { - case RXRPC_CALL_UNINITIALISED: case RXRPC_CALL_CLIENT_AWAIT_CONN: - case RXRPC_CALL_SERVER_PREALLOC: case RXRPC_CALL_SERVER_SECURING: + if (p.command == RXRPC_CMD_SEND_ABORT) + break; + fallthrough; + case RXRPC_CALL_UNINITIALISED: + case RXRPC_CALL_SERVER_PREALLOC: rxrpc_put_call(call, rxrpc_call_put_sendmsg); ret = -EBUSY; goto error_release_sock; From db099c625b13a74d462521a46d98a8ce5b53af5d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 28 Apr 2023 21:27:56 +0100 Subject: [PATCH 31/69] rxrpc: Fix timeout of a call that hasn't yet been granted a channel afs_make_call() calls rxrpc_kernel_begin_call() to begin a call (which may get stalled in the background waiting for a connection to become available); it then calls rxrpc_kernel_set_max_life() to set the timeouts - but that starts the call timer so the call timer might then expire before we get a connection assigned - leading to the following oops if the call stalled: BUG: kernel NULL pointer dereference, address: 0000000000000000 ... CPU: 1 PID: 5111 Comm: krxrpcio/0 Not tainted 6.3.0-rc7-build3+ #701 RIP: 0010:rxrpc_alloc_txbuf+0xc0/0x157 ... Call Trace: rxrpc_send_ACK+0x50/0x13b rxrpc_input_call_event+0x16a/0x67d rxrpc_io_thread+0x1b6/0x45f ? _raw_spin_unlock_irqrestore+0x1f/0x35 ? rxrpc_input_packet+0x519/0x519 kthread+0xe7/0xef ? kthread_complete_and_exit+0x1b/0x1b ret_from_fork+0x22/0x30 Fix this by noting the timeouts in struct rxrpc_call when the call is created. The timer will be started when the first packet is transmitted. It shouldn't be possible to trigger this directly from userspace through AF_RXRPC as sendmsg() will return EBUSY if the call is in the waiting-for-conn state if it dropped out of the wait due to a signal. Fixes: 9d35d880e0e4 ("rxrpc: Move client call connection to the I/O thread") Reported-by: Marc Dionne Signed-off-by: David Howells cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- fs/afs/afs.h | 4 ++-- fs/afs/internal.h | 2 +- fs/afs/rxrpc.c | 8 +++----- include/net/af_rxrpc.h | 21 +++++++++++---------- net/rxrpc/af_rxrpc.c | 3 +++ net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_object.c | 9 ++++++++- net/rxrpc/sendmsg.c | 1 + 8 files changed, 30 insertions(+), 19 deletions(-) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 432cb4b23961..81815724db6c 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -19,8 +19,8 @@ #define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */ #define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */ -#define AFS_VL_MAX_LIFESPAN (120 * HZ) -#define AFS_PROBE_MAX_LIFESPAN (30 * HZ) +#define AFS_VL_MAX_LIFESPAN 120 +#define AFS_PROBE_MAX_LIFESPAN 30 typedef u64 afs_volid_t; typedef u64 afs_vnodeid_t; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index ad8523d0d038..68ae91d21b57 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -128,7 +128,7 @@ struct afs_call { spinlock_t state_lock; int error; /* error code */ u32 abort_code; /* Remote abort ID or 0 */ - unsigned int max_lifespan; /* Maximum lifespan to set if not 0 */ + unsigned int max_lifespan; /* Maximum lifespan in secs to set if not 0 */ unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned count2; /* count used in unmarshalling */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index e08b850c3e6d..ed1644e7683f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -335,7 +335,9 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) /* create a call */ rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, (unsigned long)call, - tx_total_len, gfp, + tx_total_len, + call->max_lifespan, + gfp, (call->async ? afs_wake_up_async_call : afs_wake_up_call_waiter), @@ -350,10 +352,6 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) } call->rxcall = rxcall; - - if (call->max_lifespan) - rxrpc_kernel_set_max_life(call->net->socket, rxcall, - call->max_lifespan); call->issue_time = ktime_get_real(); /* send the request */ diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 01a35e113ab9..5531dd08061e 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -40,16 +40,17 @@ typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long); void rxrpc_kernel_new_call_notification(struct socket *, rxrpc_notify_new_call_t, rxrpc_discard_new_call_t); -struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, - struct sockaddr_rxrpc *, - struct key *, - unsigned long, - s64, - gfp_t, - rxrpc_notify_rx_t, - bool, - enum rxrpc_interruptibility, - unsigned int); +struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, + struct sockaddr_rxrpc *srx, + struct key *key, + unsigned long user_call_ID, + s64 tx_total_len, + u32 hard_timeout, + gfp_t gfp, + rxrpc_notify_rx_t notify_rx, + bool upgrade, + enum rxrpc_interruptibility interruptibility, + unsigned int debug_id); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t, rxrpc_notify_end_tx_t); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c32b164206f9..31f738d65f1c 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -265,6 +265,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @key: The security context to use (defaults to socket setting) * @user_call_ID: The ID to use * @tx_total_len: Total length of data to transmit during the call (or -1) + * @hard_timeout: The maximum lifespan of the call in sec * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue * @upgrade: Request service upgrade for call @@ -283,6 +284,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct key *key, unsigned long user_call_ID, s64 tx_total_len, + u32 hard_timeout, gfp_t gfp, rxrpc_notify_rx_t notify_rx, bool upgrade, @@ -313,6 +315,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, p.tx_total_len = tx_total_len; p.interruptibility = interruptibility; p.kernel = true; + p.timeouts.hard = hard_timeout; memset(&cp, 0, sizeof(cp)); cp.local = rx->local; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 67b0a894162d..5d44dc08f66d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -616,6 +616,7 @@ struct rxrpc_call { unsigned long expect_term_by; /* When we expect call termination by */ u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ + u32 hard_timo; /* Maximum lifetime or 0 (jif) */ struct timer_list timer; /* Combined event timer */ struct work_struct destroyer; /* In-process-context destroyer */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index e9f1f49d18c2..fecbc73054bc 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -226,6 +226,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, if (cp->exclusive) __set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags); + if (p->timeouts.normal) + call->next_rx_timo = min(msecs_to_jiffies(p->timeouts.normal), 1UL); + if (p->timeouts.idle) + call->next_req_timo = min(msecs_to_jiffies(p->timeouts.idle), 1UL); + if (p->timeouts.hard) + call->hard_timo = p->timeouts.hard * HZ; + ret = rxrpc_init_client_call_security(call); if (ret < 0) { rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret); @@ -257,7 +264,7 @@ void rxrpc_start_call_timer(struct rxrpc_call *call) call->keepalive_at = j; call->expect_rx_by = j; call->expect_req_by = j; - call->expect_term_by = j; + call->expect_term_by = j + call->hard_timo; call->timer.expires = now; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index c1b074c17b33..8e0b94714e84 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -651,6 +651,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (IS_ERR(call)) return PTR_ERR(call); /* ... and we have the call lock. */ + p.call.nr_timeouts = 0; ret = 0; if (rxrpc_call_is_complete(call)) goto out_put_unlock; From c6d96df9fa2c1d19525239d4262889cce594ce6c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 26 Apr 2023 19:21:53 +0200 Subject: [PATCH 32/69] net: ethernet: mtk_eth_soc: drop generic vlan rx offload, only use DSA untagging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Through testing I found out that hardware vlan rx offload support seems to have some hardware issues. At least when using multiple MACs and when receiving tagged packets on the secondary MAC, the hardware can sometimes start to emit wrong tags on the first MAC as well. In order to avoid such issues, drop the feature configuration and use the offload feature only for DSA hardware untagging on MT7621/MT7622 devices where this feature works properly. Fixes: 08666cbb7dd5 ("net: ethernet: mtk_eth_soc: add support for configuring vlan rx offload") Tested-by: Frank Wunderlich Signed-off-by: Felix Fietkau Signed-off-by: Frank Wunderlich Tested-by: Arınç ÜNAL Acked-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20230426172153.8352-1-linux@fw-web.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 106 ++++++++------------ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 - 2 files changed, 40 insertions(+), 67 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 9e948d091a69..a75fd072082c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1918,9 +1918,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, while (done < budget) { unsigned int pktlen, *rxdcsum; - bool has_hwaccel_tag = false; struct net_device *netdev; - u16 vlan_proto, vlan_tci; dma_addr_t dma_addr; u32 hash, reason; int mac = 0; @@ -2055,31 +2053,16 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, netdev); - if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) { - if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { - if (trxd.rxd3 & RX_DMA_VTAG_V2) { - vlan_proto = RX_DMA_VPID(trxd.rxd4); - vlan_tci = RX_DMA_VID(trxd.rxd4); - has_hwaccel_tag = true; - } - } else if (trxd.rxd2 & RX_DMA_VTAG) { - vlan_proto = RX_DMA_VPID(trxd.rxd3); - vlan_tci = RX_DMA_VID(trxd.rxd3); - has_hwaccel_tag = true; - } - } - /* When using VLAN untagging in combination with DSA, the * hardware treats the MTK special tag as a VLAN and untags it. */ - if (has_hwaccel_tag && netdev_uses_dsa(netdev)) { - unsigned int port = vlan_proto & GENMASK(2, 0); + if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) && + (trxd.rxd2 & RX_DMA_VTAG) && netdev_uses_dsa(netdev)) { + unsigned int port = RX_DMA_VPID(trxd.rxd3) & GENMASK(2, 0); if (port < ARRAY_SIZE(eth->dsa_meta) && eth->dsa_meta[port]) skb_dst_set_noref(skb, ð->dsa_meta[port]->dst); - } else if (has_hwaccel_tag) { - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan_tci); } if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) @@ -2907,29 +2890,11 @@ static netdev_features_t mtk_fix_features(struct net_device *dev, static int mtk_set_features(struct net_device *dev, netdev_features_t features) { - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; netdev_features_t diff = dev->features ^ features; - int i; if ((diff & NETIF_F_LRO) && !(features & NETIF_F_LRO)) mtk_hwlro_netdev_disable(dev); - /* Set RX VLAN offloading */ - if (!(diff & NETIF_F_HW_VLAN_CTAG_RX)) - return 0; - - mtk_w32(eth, !!(features & NETIF_F_HW_VLAN_CTAG_RX), - MTK_CDMP_EG_CTRL); - - /* sync features with other MAC */ - for (i = 0; i < MTK_MAC_COUNT; i++) { - if (!eth->netdev[i] || eth->netdev[i] == dev) - continue; - eth->netdev[i]->features &= ~NETIF_F_HW_VLAN_CTAG_RX; - eth->netdev[i]->features |= features & NETIF_F_HW_VLAN_CTAG_RX; - } - return 0; } @@ -3247,30 +3212,6 @@ static int mtk_open(struct net_device *dev) struct mtk_eth *eth = mac->hw; int i, err; - if (mtk_uses_dsa(dev) && !eth->prog) { - for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) { - struct metadata_dst *md_dst = eth->dsa_meta[i]; - - if (md_dst) - continue; - - md_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, - GFP_KERNEL); - if (!md_dst) - return -ENOMEM; - - md_dst->u.port_info.port_id = i; - eth->dsa_meta[i] = md_dst; - } - } else { - /* Hardware special tag parsing needs to be disabled if at least - * one MAC does not use DSA. - */ - u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL); - val &= ~MTK_CDMP_STAG_EN; - mtk_w32(eth, val, MTK_CDMP_IG_CTRL); - } - err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0); if (err) { netdev_err(dev, "%s: could not attach PHY: %d\n", __func__, @@ -3309,6 +3250,40 @@ static int mtk_open(struct net_device *dev) phylink_start(mac->phylink); netif_tx_start_all_queues(dev); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) + return 0; + + if (mtk_uses_dsa(dev) && !eth->prog) { + for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) { + struct metadata_dst *md_dst = eth->dsa_meta[i]; + + if (md_dst) + continue; + + md_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, + GFP_KERNEL); + if (!md_dst) + return -ENOMEM; + + md_dst->u.port_info.port_id = i; + eth->dsa_meta[i] = md_dst; + } + } else { + /* Hardware special tag parsing needs to be disabled if at least + * one MAC does not use DSA. + */ + u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL); + + val &= ~MTK_CDMP_STAG_EN; + mtk_w32(eth, val, MTK_CDMP_IG_CTRL); + + val = mtk_r32(eth, MTK_CDMQ_IG_CTRL); + val &= ~MTK_CDMQ_STAG_EN; + mtk_w32(eth, val, MTK_CDMQ_IG_CTRL); + + mtk_w32(eth, 0, MTK_CDMP_EG_CTRL); + } + return 0; } @@ -3793,10 +3768,9 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { val = mtk_r32(eth, MTK_CDMP_IG_CTRL); mtk_w32(eth, val | MTK_CDMP_STAG_EN, MTK_CDMP_IG_CTRL); - } - /* Enable RX VLan Offloading */ - mtk_w32(eth, 1, MTK_CDMP_EG_CTRL); + mtk_w32(eth, 1, MTK_CDMP_EG_CTRL); + } /* set interrupt delays based on current Net DIM sample */ mtk_dim_rx(ð->rx_dim.work); @@ -4453,7 +4427,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) eth->netdev[id]->hw_features |= NETIF_F_LRO; eth->netdev[id]->vlan_features = eth->soc->hw_features & - ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); + ~NETIF_F_HW_VLAN_CTAG_TX; eth->netdev[id]->features |= eth->soc->hw_features; eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index cdcf8534283e..707445f6bcb1 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -48,7 +48,6 @@ #define MTK_HW_FEATURES (NETIF_F_IP_CSUM | \ NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_TX | \ - NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_SG | NETIF_F_TSO | \ NETIF_F_TSO6 | \ NETIF_F_IPV6_CSUM |\ From 8509f62b0b07ae8d6dec5aa9613ab1b250ff632f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 25 Apr 2023 16:50:32 +0200 Subject: [PATCH 33/69] netfilter: nf_tables: hit ENOENT on unexisting chain/flowtable update with missing attributes If user does not specify hook number and priority, then assume this is a chain/flowtable update. Therefore, report ENOENT which provides a better hint than EINVAL. Set on extended netlink error report to refer to the chain name. Fixes: 5b6743fb2c2a ("netfilter: nf_tables: skip flowtable hooknum and priority on device updates") Fixes: 5efe72698a97 ("netfilter: nf_tables: support for adding new devices to an existing netdev chain") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 09542951656c..8b6c61a2196c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2075,8 +2075,10 @@ static int nft_chain_parse_hook(struct net *net, if (!basechain) { if (!ha[NFTA_HOOK_HOOKNUM] || - !ha[NFTA_HOOK_PRIORITY]) - return -EINVAL; + !ha[NFTA_HOOK_PRIORITY]) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); + return -ENOENT; + } hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); @@ -7693,7 +7695,7 @@ static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX }; static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, - const struct nlattr *attr, + const struct nlattr * const nla[], struct nft_flowtable_hook *flowtable_hook, struct nft_flowtable *flowtable, struct netlink_ext_ack *extack, bool add) @@ -7705,15 +7707,18 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, INIT_LIST_HEAD(&flowtable_hook->list); - err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr, + err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, + nla[NFTA_FLOWTABLE_HOOK], nft_flowtable_hook_policy, NULL); if (err < 0) return err; if (add) { if (!tb[NFTA_FLOWTABLE_HOOK_NUM] || - !tb[NFTA_FLOWTABLE_HOOK_PRIORITY]) - return -EINVAL; + !tb[NFTA_FLOWTABLE_HOOK_PRIORITY]) { + NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]); + return -ENOENT; + } hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM])); if (hooknum != NF_NETDEV_INGRESS) @@ -7898,8 +7903,8 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, u32 flags; int err; - err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK], - &flowtable_hook, flowtable, extack, false); + err = nft_flowtable_parse_hook(ctx, nla, &flowtable_hook, flowtable, + extack, false); if (err < 0) return err; @@ -8044,8 +8049,8 @@ static int nf_tables_newflowtable(struct sk_buff *skb, if (err < 0) goto err3; - err = nft_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], - &flowtable_hook, flowtable, extack, true); + err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable, + extack, true); if (err < 0) goto err4; @@ -8107,8 +8112,8 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, struct nft_trans *trans; int err; - err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK], - &flowtable_hook, flowtable, extack, false); + err = nft_flowtable_parse_hook(ctx, nla, &flowtable_hook, flowtable, + extack, false); if (err < 0) return err; From de4773f0235acf74554f6a64ea60adc0d7b01895 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 25 Apr 2023 22:11:39 +0100 Subject: [PATCH 34/69] selftests: netfilter: fix libmnl pkg-config usage 1. Don't hard-code pkg-config 2. Remove distro-specific default for CFLAGS 3. Use pkg-config for LDLIBS Fixes: a50a88f026fb ("selftests: netfilter: fix a build error on openSUSE") Suggested-by: Jan Engelhardt Signed-off-by: Jeremy Sowden Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 4504ee07be08..3686bfa6c58d 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -8,8 +8,11 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ conntrack_vrf.sh nft_synproxy.sh rpath.sh -CFLAGS += $(shell pkg-config --cflags libmnl 2>/dev/null || echo "-I/usr/include/libmnl") -LDLIBS = -lmnl +HOSTPKG_CONFIG := pkg-config + +CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) +LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) + TEST_GEN_FILES = nf-queue connect_close include ../lib.mk From c1592a89942e9678f7d9c8030efa777c0d57edab Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 May 2023 10:25:24 +0200 Subject: [PATCH 35/69] netfilter: nf_tables: deactivate anonymous set from preparation phase Toggle deleted anonymous sets as inactive in the next generation, so users cannot perform any update on it. Clear the generation bitmask in case the transaction is aborted. The following KASAN splat shows a set element deletion for a bound anonymous set that has been already removed in the same transaction. [ 64.921510] ================================================================== [ 64.923123] BUG: KASAN: wild-memory-access in nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.924745] Write of size 8 at addr dead000000000122 by task test/890 [ 64.927903] CPU: 3 PID: 890 Comm: test Not tainted 6.3.0+ #253 [ 64.931120] Call Trace: [ 64.932699] [ 64.934292] dump_stack_lvl+0x33/0x50 [ 64.935908] ? nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.937551] kasan_report+0xda/0x120 [ 64.939186] ? nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.940814] nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.942452] ? __kasan_slab_alloc+0x2d/0x60 [ 64.944070] ? nf_tables_setelem_notify+0x190/0x190 [nf_tables] [ 64.945710] ? kasan_set_track+0x21/0x30 [ 64.947323] nfnetlink_rcv_batch+0x709/0xd90 [nfnetlink] [ 64.948898] ? nfnetlink_rcv_msg+0x480/0x480 [nfnetlink] Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 12 ++++++++++++ net/netfilter/nft_dynset.c | 2 +- net/netfilter/nft_lookup.c | 2 +- net/netfilter/nft_objref.c | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3ed21d2d5659..2e24ea1d744c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -619,6 +619,7 @@ struct nft_set_binding { }; enum nft_trans_phase; +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8b6c61a2196c..59fb8320ab4d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5127,12 +5127,24 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } } +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) +{ + if (nft_set_is_anonymous(set)) + nft_clear(ctx->net, set); + + set->use++; +} +EXPORT_SYMBOL_GPL(nf_tables_activate_set); + void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { switch (phase) { case NFT_TRANS_PREPARE: + if (nft_set_is_anonymous(set)) + nft_deactivate_next(ctx->net, set); + set->use--; return; case NFT_TRANS_ABORT: diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 274579b1696e..bd19c7aec92e 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -342,7 +342,7 @@ static void nft_dynset_activate(const struct nft_ctx *ctx, { struct nft_dynset *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_dynset_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index cecf8ab90e58..03ef4fdaa460 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -167,7 +167,7 @@ static void nft_lookup_activate(const struct nft_ctx *ctx, { struct nft_lookup *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_lookup_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index cb37169608ba..a48dd5b5d45b 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -185,7 +185,7 @@ static void nft_objref_map_activate(const struct nft_ctx *ctx, { struct nft_objref_map *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, From 048486f81d01db4d100af021ee2ea211d19732a0 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Wed, 3 May 2023 12:39:34 +0530 Subject: [PATCH 36/69] octeontx2-af: Secure APR table update with the lock APR table contains the lmtst base address of PF/VFs. These entries are updated by the PF/VF during the device probe. The lmtst address is fetched from HW using "TXN_REQ" and "ADDR_RSP_STS" registers. The lock tries to protect these registers from getting overwritten when multiple PFs invokes rvu_get_lmtaddr() simultaneously. For example, if PF1 submit the request and got permitted before it reads the response and PF2 got scheduled submit the request then the response of PF1 is overwritten by the PF2 response. Fixes: 893ae97214c3 ("octeontx2-af: cn10k: Support configurable LMTST regions") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/rvu_cn10k.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c index 4ad9ff025c96..0e74c5a2231e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c @@ -60,13 +60,14 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc, u64 iova, u64 *lmt_addr) { u64 pa, val, pf; - int err; + int err = 0; if (!iova) { dev_err(rvu->dev, "%s Requested Null address for transulation\n", __func__); return -EINVAL; } + mutex_lock(&rvu->rsrc_lock); rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova); pf = rvu_get_pf(pcifunc) & 0x1F; val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 | @@ -76,12 +77,13 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc, err = rvu_poll_reg(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_RSP_STS, BIT_ULL(0), false); if (err) { dev_err(rvu->dev, "%s LMTLINE iova transulation failed\n", __func__); - return err; + goto exit; } val = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_RSP_STS); if (val & ~0x1ULL) { dev_err(rvu->dev, "%s LMTLINE iova transulation failed err:%llx\n", __func__, val); - return -EIO; + err = -EIO; + goto exit; } /* PA[51:12] = RVU_AF_SMMU_TLN_FLIT0[57:18] * PA[11:0] = IOVA[11:0] @@ -89,8 +91,9 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc, pa = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TLN_FLIT0) >> 18; pa &= GENMASK_ULL(39, 0); *lmt_addr = (pa << 12) | (iova & 0xFFF); - - return 0; +exit: + mutex_unlock(&rvu->rsrc_lock); + return err; } static int rvu_update_lmtaddr(struct rvu *rvu, u16 pcifunc, u64 lmt_addr) From c60a6b90e7890453f09e0d2163d6acadabe3415b Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 3 May 2023 12:39:35 +0530 Subject: [PATCH 37/69] octeontx2-af: Fix start and end bit for scan config In the current driver, NPC exact match feature was not getting enabled as configured bit was not read properly. for_each_set_bit_from() need end bit as one bit post position in the bit map to read NPC exact nibble enable bits properly. This patch fixes the same. Fixes: b747923afff8 ("octeontx2-af: Exact match support") Signed-off-by: Ratheesh Kannoth Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 006beb5cf98d..f15efd41972e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -594,8 +594,7 @@ static int npc_scan_kex(struct rvu *rvu, int blkaddr, u8 intf) */ masked_cfg = cfg & NPC_EXACT_NIBBLE; bitnr = NPC_EXACT_NIBBLE_START; - for_each_set_bit_from(bitnr, (unsigned long *)&masked_cfg, - NPC_EXACT_NIBBLE_START) { + for_each_set_bit_from(bitnr, (unsigned long *)&masked_cfg, NPC_EXACT_NIBBLE_END + 1) { npc_scan_exact_result(mcam, bitnr, key_nibble, intf); key_nibble++; } From 60999cb83554ebcf6cfff8894bc2c3d99ea858ba Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 3 May 2023 12:39:36 +0530 Subject: [PATCH 38/69] octeontx2-af: Fix depth of cam and mem table. In current driver, NPC cam and mem table sizes are read from wrong register offset. This patch fixes the register offset so that correct values are populated on read. Fixes: b747923afff8 ("octeontx2-af: Exact match support") Signed-off-by: Ratheesh Kannoth Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 20ebb9c95c73..6597af84aa36 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -1868,9 +1868,9 @@ int rvu_npc_exact_init(struct rvu *rvu) rvu->hw->table = table; /* Read table size, ways and depth */ - table->mem_table.depth = FIELD_GET(GENMASK_ULL(31, 24), npc_const3); table->mem_table.ways = FIELD_GET(GENMASK_ULL(19, 16), npc_const3); - table->cam_table.depth = FIELD_GET(GENMASK_ULL(15, 0), npc_const3); + table->mem_table.depth = FIELD_GET(GENMASK_ULL(15, 0), npc_const3); + table->cam_table.depth = FIELD_GET(GENMASK_ULL(31, 24), npc_const3); dev_dbg(rvu->dev, "%s: NPC exact match 4way_2k table(ways=%d, depth=%d)\n", __func__, table->mem_table.ways, table->cam_table.depth); From 2a6eecc592b4d59a04d513aa25fc0f30d52100cd Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 3 May 2023 12:39:37 +0530 Subject: [PATCH 39/69] octeontx2-pf: Increase the size of dmac filter flows CN10kb supports large number of dmac filter flows to be inserted. Increase the field size to accommodate the same Fixes: b747923afff8 ("octeontx2-af: Exact match support") Signed-off-by: Ratheesh Kannoth Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index f42b2b65bfd7..0c8fc66ade82 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -335,11 +335,11 @@ struct otx2_flow_config { #define OTX2_PER_VF_VLAN_FLOWS 2 /* Rx + Tx per VF */ #define OTX2_VF_VLAN_RX_INDEX 0 #define OTX2_VF_VLAN_TX_INDEX 1 - u16 max_flows; - u8 dmacflt_max_flows; u32 *bmap_to_dmacindex; unsigned long *dmacflt_bmap; struct list_head flow_list; + u32 dmacflt_max_flows; + u16 max_flows; }; struct otx2_tc_info { From cb5edce271764524b88b1a6866b3e626686d9a33 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Wed, 3 May 2023 12:39:38 +0530 Subject: [PATCH 40/69] octeontx2-af: Add validation for lmac type Upon physical link change, firmware reports to the kernel about the change along with the details like speed, lmac_type_id, etc. Kernel derives lmac_type based on lmac_type_id received from firmware. In a few scenarios, firmware returns an invalid lmac_type_id, which is resulting in below kernel panic. This patch adds the missing validation of the lmac_type_id field. Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 35.321595] Modules linked in: [ 35.328982] CPU: 0 PID: 31 Comm: kworker/0:1 Not tainted 5.4.210-g2e3169d8e1bc-dirty #17 [ 35.337014] Hardware name: Marvell CN103XX board (DT) [ 35.344297] Workqueue: events work_for_cpu_fn [ 35.352730] pstate: 40400089 (nZcv daIf +PAN -UAO) [ 35.360267] pc : strncpy+0x10/0x30 [ 35.366595] lr : cgx_link_change_handler+0x90/0x180 Fixes: 61071a871ea6 ("octeontx2-af: Forward CGX link notifications to PFs") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 724df6398bbe..bd77152bb8d7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1231,6 +1231,14 @@ static inline void link_status_user_format(u64 lstat, linfo->an = FIELD_GET(RESP_LINKSTAT_AN, lstat); linfo->fec = FIELD_GET(RESP_LINKSTAT_FEC, lstat); linfo->lmac_type_id = FIELD_GET(RESP_LINKSTAT_LMAC_TYPE, lstat); + + if (linfo->lmac_type_id >= LMAC_MODE_MAX) { + dev_err(&cgx->pdev->dev, "Unknown lmac_type_id %d reported by firmware on cgx port%d:%d", + linfo->lmac_type_id, cgx->cgx_id, lmac_id); + strncpy(linfo->lmac_type, "Unknown", LMACTYPE_STR_LEN - 1); + return; + } + lmac_string = cgx_lmactype_string[linfo->lmac_type_id]; strncpy(linfo->lmac_type, lmac_string, LMACTYPE_STR_LEN - 1); } From 2075bf150ddf320df02c05e242774dc0f73be1a1 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Wed, 3 May 2023 12:39:39 +0530 Subject: [PATCH 41/69] octeontx2-af: Update correct mask to filter IPv4 fragments During the initial design, the IPv4 ip_flag mask was set to 0xff. Which results to filter only fragmets with (fragment_offset == 0). As part of the fix, updated the mask to 0x20 to filter all the fragmented packets irrespective of the fragment_offset value. Fixes: c672e3727989 ("octeontx2-pf: Add support to filter packet based on IP fragment") Signed-off-by: Suman Ghosh Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 044cc211424e..8392f63e433f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -544,7 +544,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { if (ntohs(flow_spec->etype) == ETH_P_IP) { flow_spec->ip_flag = IPV4_FLAG_MORE; - flow_mask->ip_flag = 0xff; + flow_mask->ip_flag = IPV4_FLAG_MORE; req->features |= BIT_ULL(NPC_IPFRAG_IPV4); } else if (ntohs(flow_spec->etype) == ETH_P_IPV6) { flow_spec->next_header = IPPROTO_FRAGMENT; From 406bed11fb91a0b35c26fe633d8700febaec6439 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 3 May 2023 12:39:40 +0530 Subject: [PATCH 42/69] octeontx2-af: Update/Fix NPC field hash extract feature 1. As per previous implementation, mask and control parameter to generate the field hash value was not passed to the caller program. Updated the secret key mbox to share that information as well, as a part of the fix. 2. Earlier implementation did not consider hash reduction of both source and destination IPv6 addresses. Only source IPv6 address was considered. This fix solves that and provides option to hash Fixes: 56d9f5fd2246 ("octeontx2-af: Use hashed field in MCAM key") Signed-off-by: Ratheesh Kannoth Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 16 +++++--- .../marvell/octeontx2/af/rvu_npc_hash.c | 37 ++++++++++++------- .../marvell/octeontx2/af/rvu_npc_hash.h | 6 +++ 3 files changed, 41 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 8fb5cae7285b..4c1e374bb376 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -245,9 +245,9 @@ M(NPC_MCAM_READ_BASE_RULE, 0x6011, npc_read_base_steer_rule, \ M(NPC_MCAM_GET_STATS, 0x6012, npc_mcam_entry_stats, \ npc_mcam_get_stats_req, \ npc_mcam_get_stats_rsp) \ -M(NPC_GET_SECRET_KEY, 0x6013, npc_get_secret_key, \ - npc_get_secret_key_req, \ - npc_get_secret_key_rsp) \ +M(NPC_GET_FIELD_HASH_INFO, 0x6013, npc_get_field_hash_info, \ + npc_get_field_hash_info_req, \ + npc_get_field_hash_info_rsp) \ M(NPC_GET_FIELD_STATUS, 0x6014, npc_get_field_status, \ npc_get_field_status_req, \ npc_get_field_status_rsp) \ @@ -1524,14 +1524,20 @@ struct npc_mcam_get_stats_rsp { u8 stat_ena; /* enabled */ }; -struct npc_get_secret_key_req { +struct npc_get_field_hash_info_req { struct mbox_msghdr hdr; u8 intf; }; -struct npc_get_secret_key_rsp { +struct npc_get_field_hash_info_rsp { struct mbox_msghdr hdr; u64 secret_key[3]; +#define NPC_MAX_HASH 2 +#define NPC_MAX_HASH_MASK 2 + /* NPC_AF_INTF(0..1)_HASH(0..1)_MASK(0..1) */ + u64 hash_mask[NPC_MAX_INTF][NPC_MAX_HASH][NPC_MAX_HASH_MASK]; + /* NPC_AF_INTF(0..1)_HASH(0..1)_RESULT_CTRL */ + u64 hash_ctrl[NPC_MAX_INTF][NPC_MAX_HASH]; }; enum ptp_op { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 6597af84aa36..68f813040363 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -110,8 +110,8 @@ static u64 npc_update_use_hash(int lt, int ld) * in KEX_LD_CFG */ cfg = KEX_LD_CFG_USE_HASH(0x1, 0x03, - ld ? 0x8 : 0x18, - 0x1, 0x0, 0x10); + ld ? 0x18 : 0x8, + 0x1, 0x0, ld ? 0x14 : 0x10); break; } @@ -134,7 +134,6 @@ static void npc_program_mkex_hash_rx(struct rvu *rvu, int blkaddr, if (mkex_hash->lid_lt_ld_hash_en[intf][lid][lt][ld]) { u64 cfg = npc_update_use_hash(lt, ld); - hash_cnt++; if (hash_cnt == NPC_MAX_HASH) return; @@ -149,6 +148,8 @@ static void npc_program_mkex_hash_rx(struct rvu *rvu, int blkaddr, mkex_hash->hash_mask[intf][ld][1]); SET_KEX_LD_HASH_CTRL(intf, ld, mkex_hash->hash_ctrl[intf][ld]); + + hash_cnt++; } } } @@ -171,7 +172,6 @@ static void npc_program_mkex_hash_tx(struct rvu *rvu, int blkaddr, if (mkex_hash->lid_lt_ld_hash_en[intf][lid][lt][ld]) { u64 cfg = npc_update_use_hash(lt, ld); - hash_cnt++; if (hash_cnt == NPC_MAX_HASH) return; @@ -187,8 +187,6 @@ static void npc_program_mkex_hash_tx(struct rvu *rvu, int blkaddr, SET_KEX_LD_HASH_CTRL(intf, ld, mkex_hash->hash_ctrl[intf][ld]); hash_cnt++; - if (hash_cnt == NPC_MAX_HASH) - return; } } } @@ -238,8 +236,8 @@ void npc_update_field_hash(struct rvu *rvu, u8 intf, struct flow_msg *omask) { struct npc_mcam_kex_hash *mkex_hash = rvu->kpu.mkex_hash; - struct npc_get_secret_key_req req; - struct npc_get_secret_key_rsp rsp; + struct npc_get_field_hash_info_req req; + struct npc_get_field_hash_info_rsp rsp; u64 ldata[2], cfg; u32 field_hash; u8 hash_idx; @@ -250,7 +248,7 @@ void npc_update_field_hash(struct rvu *rvu, u8 intf, } req.intf = intf; - rvu_mbox_handler_npc_get_secret_key(rvu, &req, &rsp); + rvu_mbox_handler_npc_get_field_hash_info(rvu, &req, &rsp); for (hash_idx = 0; hash_idx < NPC_MAX_HASH; hash_idx++) { cfg = rvu_read64(rvu, blkaddr, NPC_AF_INTFX_HASHX_CFG(intf, hash_idx)); @@ -311,13 +309,13 @@ void npc_update_field_hash(struct rvu *rvu, u8 intf, } } -int rvu_mbox_handler_npc_get_secret_key(struct rvu *rvu, - struct npc_get_secret_key_req *req, - struct npc_get_secret_key_rsp *rsp) +int rvu_mbox_handler_npc_get_field_hash_info(struct rvu *rvu, + struct npc_get_field_hash_info_req *req, + struct npc_get_field_hash_info_rsp *rsp) { u64 *secret_key = rsp->secret_key; u8 intf = req->intf; - int blkaddr; + int i, j, blkaddr; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) { @@ -329,6 +327,19 @@ int rvu_mbox_handler_npc_get_secret_key(struct rvu *rvu, secret_key[1] = rvu_read64(rvu, blkaddr, NPC_AF_INTFX_SECRET_KEY1(intf)); secret_key[2] = rvu_read64(rvu, blkaddr, NPC_AF_INTFX_SECRET_KEY2(intf)); + for (i = 0; i < NPC_MAX_HASH; i++) { + for (j = 0; j < NPC_MAX_HASH_MASK; j++) { + rsp->hash_mask[NIX_INTF_RX][i][j] = + GET_KEX_LD_HASH_MASK(NIX_INTF_RX, i, j); + rsp->hash_mask[NIX_INTF_TX][i][j] = + GET_KEX_LD_HASH_MASK(NIX_INTF_TX, i, j); + } + } + + for (i = 0; i < NPC_MAX_INTF; i++) + for (j = 0; j < NPC_MAX_HASH; j++) + rsp->hash_ctrl[i][j] = GET_KEX_LD_HASH_CTRL(i, j); + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h index 3efeb09c58de..65936f4aeaac 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h @@ -31,6 +31,12 @@ rvu_write64(rvu, blkaddr, \ NPC_AF_INTFX_HASHX_MASKX(intf, ld, mask_idx), cfg) +#define GET_KEX_LD_HASH_CTRL(intf, ld) \ + rvu_read64(rvu, blkaddr, NPC_AF_INTFX_HASHX_RESULT_CTRL(intf, ld)) + +#define GET_KEX_LD_HASH_MASK(intf, ld, mask_idx) \ + rvu_read64(rvu, blkaddr, NPC_AF_INTFX_HASHX_MASKX(intf, ld, mask_idx)) + #define SET_KEX_LD_HASH_CTRL(intf, ld, cfg) \ rvu_write64(rvu, blkaddr, \ NPC_AF_INTFX_HASHX_RESULT_CTRL(intf, ld), cfg) From f66155905959076619c9c519fb099e8ae6cb6f7b Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 3 May 2023 12:39:41 +0530 Subject: [PATCH 43/69] octeontx2-af: Fix issues with NPC field hash extract 1. Allow field hash configuration for both source and destination IPv6. 2. Configure hardware parser based on hash extract feature enable flag for IPv6. 3. Fix IPv6 endianness issue while updating the source/destination IP address via ntuple rule. Fixes: 56d9f5fd2246 ("octeontx2-af: Use hashed field in MCAM key") Signed-off-by: Ratheesh Kannoth Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../marvell/octeontx2/af/rvu_npc_fs.c | 23 +++-- .../marvell/octeontx2/af/rvu_npc_fs.h | 4 + .../marvell/octeontx2/af/rvu_npc_hash.c | 86 ++++++++++--------- .../marvell/octeontx2/af/rvu_npc_hash.h | 4 +- 4 files changed, 68 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index f15efd41972e..952319453701 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -13,11 +13,6 @@ #include "rvu_npc_fs.h" #include "rvu_npc_hash.h" -#define NPC_BYTESM GENMASK_ULL(19, 16) -#define NPC_HDR_OFFSET GENMASK_ULL(15, 8) -#define NPC_KEY_OFFSET GENMASK_ULL(5, 0) -#define NPC_LDATA_EN BIT_ULL(7) - static const char * const npc_flow_names[] = { [NPC_DMAC] = "dmac", [NPC_SMAC] = "smac", @@ -442,6 +437,7 @@ done: static void npc_scan_ldata(struct rvu *rvu, int blkaddr, u8 lid, u8 lt, u64 cfg, u8 intf) { + struct npc_mcam_kex_hash *mkex_hash = rvu->kpu.mkex_hash; struct npc_mcam *mcam = &rvu->hw->mcam; u8 hdr, key, nr_bytes, bit_offset; u8 la_ltype, la_start; @@ -490,8 +486,21 @@ do { \ NPC_SCAN_HDR(NPC_SIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 12, 4); NPC_SCAN_HDR(NPC_DIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 16, 4); NPC_SCAN_HDR(NPC_IPFRAG_IPV6, NPC_LID_LC, NPC_LT_LC_IP6_EXT, 6, 1); - NPC_SCAN_HDR(NPC_SIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 8, 16); - NPC_SCAN_HDR(NPC_DIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 24, 16); + if (rvu->hw->cap.npc_hash_extract) { + if (mkex_hash->lid_lt_ld_hash_en[intf][lid][lt][0]) + NPC_SCAN_HDR(NPC_SIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 8, 4); + else + NPC_SCAN_HDR(NPC_SIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 8, 16); + + if (mkex_hash->lid_lt_ld_hash_en[intf][lid][lt][1]) + NPC_SCAN_HDR(NPC_DIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 24, 4); + else + NPC_SCAN_HDR(NPC_DIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 24, 16); + } else { + NPC_SCAN_HDR(NPC_SIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 8, 16); + NPC_SCAN_HDR(NPC_DIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 24, 16); + } + NPC_SCAN_HDR(NPC_SPORT_UDP, NPC_LID_LD, NPC_LT_LD_UDP, 0, 2); NPC_SCAN_HDR(NPC_DPORT_UDP, NPC_LID_LD, NPC_LT_LD_UDP, 2, 2); NPC_SCAN_HDR(NPC_SPORT_TCP, NPC_LID_LD, NPC_LT_LD_TCP, 0, 2); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h index bdd65ce56a32..3f5c9042d10e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h @@ -9,6 +9,10 @@ #define __RVU_NPC_FS_H #define IPV6_WORDS 4 +#define NPC_BYTESM GENMASK_ULL(19, 16) +#define NPC_HDR_OFFSET GENMASK_ULL(15, 8) +#define NPC_KEY_OFFSET GENMASK_ULL(5, 0) +#define NPC_LDATA_EN BIT_ULL(7) void npc_update_entry(struct rvu *rvu, enum key_fields type, struct mcam_entry *entry, u64 val_lo, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 68f813040363..51209119f0f2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -78,42 +78,43 @@ static u32 rvu_npc_toeplitz_hash(const u64 *data, u64 *key, size_t data_bit_len, return hash_out; } -u32 npc_field_hash_calc(u64 *ldata, struct npc_mcam_kex_hash *mkex_hash, - u64 *secret_key, u8 intf, u8 hash_idx) +u32 npc_field_hash_calc(u64 *ldata, struct npc_get_field_hash_info_rsp rsp, + u8 intf, u8 hash_idx) { u64 hash_key[3]; u64 data_padded[2]; u32 field_hash; - hash_key[0] = secret_key[1] << 31; - hash_key[0] |= secret_key[2]; - hash_key[1] = secret_key[1] >> 33; - hash_key[1] |= secret_key[0] << 31; - hash_key[2] = secret_key[0] >> 33; + hash_key[0] = rsp.secret_key[1] << 31; + hash_key[0] |= rsp.secret_key[2]; + hash_key[1] = rsp.secret_key[1] >> 33; + hash_key[1] |= rsp.secret_key[0] << 31; + hash_key[2] = rsp.secret_key[0] >> 33; - data_padded[0] = mkex_hash->hash_mask[intf][hash_idx][0] & ldata[0]; - data_padded[1] = mkex_hash->hash_mask[intf][hash_idx][1] & ldata[1]; + data_padded[0] = rsp.hash_mask[intf][hash_idx][0] & ldata[0]; + data_padded[1] = rsp.hash_mask[intf][hash_idx][1] & ldata[1]; field_hash = rvu_npc_toeplitz_hash(data_padded, hash_key, 128, 159); - field_hash &= mkex_hash->hash_ctrl[intf][hash_idx] >> 32; - field_hash |= mkex_hash->hash_ctrl[intf][hash_idx]; + field_hash &= FIELD_GET(GENMASK(63, 32), rsp.hash_ctrl[intf][hash_idx]); + field_hash += FIELD_GET(GENMASK(31, 0), rsp.hash_ctrl[intf][hash_idx]); return field_hash; } -static u64 npc_update_use_hash(int lt, int ld) +static u64 npc_update_use_hash(struct rvu *rvu, int blkaddr, + u8 intf, int lid, int lt, int ld) { - u64 cfg = 0; + u8 hdr, key; + u64 cfg; - switch (lt) { - case NPC_LT_LC_IP6: - /* Update use_hash(bit-20) and bytesm1 (bit-16:19) - * in KEX_LD_CFG - */ - cfg = KEX_LD_CFG_USE_HASH(0x1, 0x03, - ld ? 0x18 : 0x8, - 0x1, 0x0, ld ? 0x14 : 0x10); - break; - } + cfg = rvu_read64(rvu, blkaddr, NPC_AF_INTFX_LIDX_LTX_LDX_CFG(intf, lid, lt, ld)); + hdr = FIELD_GET(NPC_HDR_OFFSET, cfg); + key = FIELD_GET(NPC_KEY_OFFSET, cfg); + + /* Update use_hash(bit-20) to 'true' and + * bytesm1(bit-16:19) to '0x3' in KEX_LD_CFG + */ + cfg = KEX_LD_CFG_USE_HASH(0x1, 0x03, + hdr, 0x1, 0x0, key); return cfg; } @@ -132,11 +133,13 @@ static void npc_program_mkex_hash_rx(struct rvu *rvu, int blkaddr, for (lt = 0; lt < NPC_MAX_LT; lt++) { for (ld = 0; ld < NPC_MAX_LD; ld++) { if (mkex_hash->lid_lt_ld_hash_en[intf][lid][lt][ld]) { - u64 cfg = npc_update_use_hash(lt, ld); + u64 cfg; if (hash_cnt == NPC_MAX_HASH) return; + cfg = npc_update_use_hash(rvu, blkaddr, + intf, lid, lt, ld); /* Set updated KEX configuration */ SET_KEX_LD(intf, lid, lt, ld, cfg); /* Set HASH configuration */ @@ -170,11 +173,13 @@ static void npc_program_mkex_hash_tx(struct rvu *rvu, int blkaddr, for (lt = 0; lt < NPC_MAX_LT; lt++) { for (ld = 0; ld < NPC_MAX_LD; ld++) if (mkex_hash->lid_lt_ld_hash_en[intf][lid][lt][ld]) { - u64 cfg = npc_update_use_hash(lt, ld); + u64 cfg; if (hash_cnt == NPC_MAX_HASH) return; + cfg = npc_update_use_hash(rvu, blkaddr, + intf, lid, lt, ld); /* Set updated KEX configuration */ SET_KEX_LD(intf, lid, lt, ld, cfg); /* Set HASH configuration */ @@ -264,44 +269,45 @@ void npc_update_field_hash(struct rvu *rvu, u8 intf, * is hashed to 32 bit value. */ case NPC_LT_LC_IP6: - if (features & BIT_ULL(NPC_SIP_IPV6)) { + /* ld[0] == hash_idx[0] == Source IPv6 + * ld[1] == hash_idx[1] == Destination IPv6 + */ + if ((features & BIT_ULL(NPC_SIP_IPV6)) && !hash_idx) { u32 src_ip[IPV6_WORDS]; be32_to_cpu_array(src_ip, pkt->ip6src, IPV6_WORDS); - ldata[0] = (u64)src_ip[0] << 32 | src_ip[1]; - ldata[1] = (u64)src_ip[2] << 32 | src_ip[3]; + ldata[1] = (u64)src_ip[0] << 32 | src_ip[1]; + ldata[0] = (u64)src_ip[2] << 32 | src_ip[3]; field_hash = npc_field_hash_calc(ldata, - mkex_hash, - rsp.secret_key, + rsp, intf, hash_idx); npc_update_entry(rvu, NPC_SIP_IPV6, entry, - field_hash, 0, 32, 0, intf); + field_hash, 0, + GENMASK(31, 0), 0, intf); memcpy(&opkt->ip6src, &pkt->ip6src, sizeof(pkt->ip6src)); memcpy(&omask->ip6src, &mask->ip6src, sizeof(mask->ip6src)); - break; - } - - if (features & BIT_ULL(NPC_DIP_IPV6)) { + } else if ((features & BIT_ULL(NPC_DIP_IPV6)) && hash_idx) { u32 dst_ip[IPV6_WORDS]; be32_to_cpu_array(dst_ip, pkt->ip6dst, IPV6_WORDS); - ldata[0] = (u64)dst_ip[0] << 32 | dst_ip[1]; - ldata[1] = (u64)dst_ip[2] << 32 | dst_ip[3]; + ldata[1] = (u64)dst_ip[0] << 32 | dst_ip[1]; + ldata[0] = (u64)dst_ip[2] << 32 | dst_ip[3]; field_hash = npc_field_hash_calc(ldata, - mkex_hash, - rsp.secret_key, + rsp, intf, hash_idx); npc_update_entry(rvu, NPC_DIP_IPV6, entry, - field_hash, 0, 32, 0, intf); + field_hash, 0, + GENMASK(31, 0), 0, intf); memcpy(&opkt->ip6dst, &pkt->ip6dst, sizeof(pkt->ip6dst)); memcpy(&omask->ip6dst, &mask->ip6dst, sizeof(mask->ip6dst)); } + break; } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h index 65936f4aeaac..a1c3d987b804 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h @@ -62,8 +62,8 @@ void npc_update_field_hash(struct rvu *rvu, u8 intf, struct flow_msg *omask); void npc_config_secret_key(struct rvu *rvu, int blkaddr); void npc_program_mkex_hash(struct rvu *rvu, int blkaddr); -u32 npc_field_hash_calc(u64 *ldata, struct npc_mcam_kex_hash *mkex_hash, - u64 *secret_key, u8 intf, u8 hash_idx); +u32 npc_field_hash_calc(u64 *ldata, struct npc_get_field_hash_info_rsp rsp, + u8 intf, u8 hash_idx); static struct npc_mcam_kex_hash npc_mkex_hash_default __maybe_unused = { .lid_lt_ld_hash_en = { From 5eb1b7220948a69298a436148a735f32ec325289 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 3 May 2023 12:39:42 +0530 Subject: [PATCH 44/69] octeontx2-af: Skip PFs if not enabled Firmware enables PFs and allocate mbox resources for each of the PFs. Currently PF driver configures mbox resources without checking whether PF is enabled or not. This results in crash. This patch fixes this issue by skipping disabled PF's mbox initialization. Fixes: 9bdc47a6e328 ("octeontx2-af: Mbox communication support btw AF and it's VFs") Signed-off-by: Ratheesh Kannoth Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/mbox.c | 5 +- .../net/ethernet/marvell/octeontx2/af/mbox.h | 3 +- .../net/ethernet/marvell/octeontx2/af/rvu.c | 49 +++++++++++++++---- 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 2898931d5260..9690ac01f02c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -157,7 +157,7 @@ EXPORT_SYMBOL(otx2_mbox_init); */ int otx2_mbox_regions_init(struct otx2_mbox *mbox, void **hwbase, struct pci_dev *pdev, void *reg_base, - int direction, int ndevs) + int direction, int ndevs, unsigned long *pf_bmap) { struct otx2_mbox_dev *mdev; int devid, err; @@ -169,6 +169,9 @@ int otx2_mbox_regions_init(struct otx2_mbox *mbox, void **hwbase, mbox->hwbase = hwbase[0]; for (devid = 0; devid < ndevs; devid++) { + if (!test_bit(devid, pf_bmap)) + continue; + mdev = &mbox->dev[devid]; mdev->mbase = hwbase[devid]; mdev->hwbase = hwbase[devid]; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 4c1e374bb376..6389ed83637d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -96,9 +96,10 @@ void otx2_mbox_destroy(struct otx2_mbox *mbox); int otx2_mbox_init(struct otx2_mbox *mbox, void __force *hwbase, struct pci_dev *pdev, void __force *reg_base, int direction, int ndevs); + int otx2_mbox_regions_init(struct otx2_mbox *mbox, void __force **hwbase, struct pci_dev *pdev, void __force *reg_base, - int direction, int ndevs); + int direction, int ndevs, unsigned long *bmap); void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid); int otx2_mbox_wait_for_rsp(struct otx2_mbox *mbox, int devid); int otx2_mbox_busy_poll_for_rsp(struct otx2_mbox *mbox, int devid); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 8683ce57ed3f..9f673bda9dbd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2282,7 +2282,7 @@ static inline void rvu_afvf_mbox_up_handler(struct work_struct *work) } static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, - int num, int type) + int num, int type, unsigned long *pf_bmap) { struct rvu_hwinfo *hw = rvu->hw; int region; @@ -2294,6 +2294,9 @@ static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, */ if (type == TYPE_AFVF) { for (region = 0; region < num; region++) { + if (!test_bit(region, pf_bmap)) + continue; + if (hw->cap.per_pf_mbox_regs) { bar4 = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFX_BAR4_ADDR(0)) + @@ -2315,6 +2318,9 @@ static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, * RVU_AF_PF_BAR4_ADDR register. */ for (region = 0; region < num; region++) { + if (!test_bit(region, pf_bmap)) + continue; + if (hw->cap.per_pf_mbox_regs) { bar4 = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFX_BAR4_ADDR(region)); @@ -2343,12 +2349,33 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, int err = -EINVAL, i, dir, dir_up; void __iomem *reg_base; struct rvu_work *mwork; + unsigned long *pf_bmap; void **mbox_regions; const char *name; + u64 cfg; + + pf_bmap = bitmap_zalloc(num, GFP_KERNEL); + if (!pf_bmap) + return -ENOMEM; + + /* RVU VFs */ + if (type == TYPE_AFVF) + bitmap_set(pf_bmap, 0, num); + + if (type == TYPE_AFPF) { + /* Mark enabled PFs in bitmap */ + for (i = 0; i < num; i++) { + cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(i)); + if (cfg & BIT_ULL(20)) + set_bit(i, pf_bmap); + } + } mbox_regions = kcalloc(num, sizeof(void *), GFP_KERNEL); - if (!mbox_regions) - return -ENOMEM; + if (!mbox_regions) { + err = -ENOMEM; + goto free_bitmap; + } switch (type) { case TYPE_AFPF: @@ -2356,7 +2383,7 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, dir = MBOX_DIR_AFPF; dir_up = MBOX_DIR_AFPF_UP; reg_base = rvu->afreg_base; - err = rvu_get_mbox_regions(rvu, mbox_regions, num, TYPE_AFPF); + err = rvu_get_mbox_regions(rvu, mbox_regions, num, TYPE_AFPF, pf_bmap); if (err) goto free_regions; break; @@ -2365,7 +2392,7 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, dir = MBOX_DIR_PFVF; dir_up = MBOX_DIR_PFVF_UP; reg_base = rvu->pfreg_base; - err = rvu_get_mbox_regions(rvu, mbox_regions, num, TYPE_AFVF); + err = rvu_get_mbox_regions(rvu, mbox_regions, num, TYPE_AFVF, pf_bmap); if (err) goto free_regions; break; @@ -2396,16 +2423,19 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, } err = otx2_mbox_regions_init(&mw->mbox, mbox_regions, rvu->pdev, - reg_base, dir, num); + reg_base, dir, num, pf_bmap); if (err) goto exit; err = otx2_mbox_regions_init(&mw->mbox_up, mbox_regions, rvu->pdev, - reg_base, dir_up, num); + reg_base, dir_up, num, pf_bmap); if (err) goto exit; for (i = 0; i < num; i++) { + if (!test_bit(i, pf_bmap)) + continue; + mwork = &mw->mbox_wrk[i]; mwork->rvu = rvu; INIT_WORK(&mwork->work, mbox_handler); @@ -2414,8 +2444,7 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, mwork->rvu = rvu; INIT_WORK(&mwork->work, mbox_up_handler); } - kfree(mbox_regions); - return 0; + goto free_regions; exit: destroy_workqueue(mw->mbox_wq); @@ -2424,6 +2453,8 @@ unmap_regions: iounmap((void __iomem *)mbox_regions[num]); free_regions: kfree(mbox_regions); +free_bitmap: + bitmap_free(pf_bmap); return err; } From c926252205c424c4842dbdbe02f8e3296f623204 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 3 May 2023 12:39:43 +0530 Subject: [PATCH 45/69] octeontx2-pf: Disable packet I/O for graceful exit At the stage of enabling packet I/O in otx2_open, If mailbox timeout occurs then interface ends up in down state where as hardware packet I/O is enabled. Hence disable packet I/O also before bailing out. Fixes: 1ea0166da050 ("octeontx2-pf: Fix the device state on error") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a75c944cc739..18284ad75157 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1835,13 +1835,22 @@ int otx2_open(struct net_device *netdev) otx2_dmacflt_reinstall_flows(pf); err = otx2_rxtx_enable(pf, true); - if (err) + /* If a mbox communication error happens at this point then interface + * will end up in a state such that it is in down state but hardware + * mcam entries are enabled to receive the packets. Hence disable the + * packet I/O. + */ + if (err == EIO) + goto err_disable_rxtx; + else if (err) goto err_tx_stop_queues; otx2_do_set_rx_mode(pf); return 0; +err_disable_rxtx: + otx2_rxtx_enable(pf, false); err_tx_stop_queues: netif_tx_stop_all_queues(netdev); netif_carrier_off(netdev); From 99ae1260fdb5f15beab8a3adfb93a9041c87a2c1 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 3 May 2023 12:39:44 +0530 Subject: [PATCH 46/69] octeontx2-vf: Detach LF resources on probe cleanup When a VF device probe fails due to error in MSIX vector allocation then the resources NIX and NPA LFs were not detached. Fix this by detaching the LFs when MSIX vector allocation fails. Fixes: 3184fb5ba96e ("octeontx2-vf: Virtual function driver support") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Sai Krishna Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index ab126f8706c7..53366dbfbf27 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -621,7 +621,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = otx2vf_realloc_msix_vectors(vf); if (err) - goto err_mbox_destroy; + goto err_detach_rsrc; err = otx2_set_real_num_queues(netdev, qcount, qcount); if (err) From 3711d44fac1f80ea69ecb7315fed05b3812a7401 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 2 May 2023 11:47:40 -0700 Subject: [PATCH 47/69] ionic: remove noise from ethtool rxnfc error msg It seems that ethtool is calling into .get_rxnfc more often with ETHTOOL_GRXCLSRLCNT which ionic doesn't know about. We don't need to log a message about it, just return not supported. Fixes: aa3198819bea6 ("ionic: Add RSS support") Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index cf33503468a3..9b2b96fa36af 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -794,7 +794,7 @@ static int ionic_get_rxnfc(struct net_device *netdev, info->data = lif->nxqs; break; default: - netdev_err(netdev, "Command parameter %d is not supported\n", + netdev_dbg(netdev, "Command parameter %d is not supported\n", info->cmd); err = -EOPNOTSUPP; } From 0fbd79c01a9a657348f7032df70c57a406468c86 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Tue, 2 May 2023 11:36:27 +0800 Subject: [PATCH 48/69] r8152: fix the autosuspend doesn't work Set supports_autosuspend = 1 for the rtl8152_cfgselector_driver. Fixes: ec51fbd1b8a2 ("r8152: add USB device driver for config selection") Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 755b0f72dd44..0999a58ca9d2 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9910,6 +9910,7 @@ static struct usb_device_driver rtl8152_cfgselector_driver = { .probe = rtl8152_cfgselector_probe, .id_table = rtl8152_table, .generic_subclass = 1, + .supports_autosuspend = 1, }; static int __init rtl8152_driver_init(void) From 9ad685dbfe7e856bbf17a7177b64676d324d6ed7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 2 May 2023 15:20:50 +0300 Subject: [PATCH 49/69] ethtool: Fix uninitialized number of lanes It is not possible to set the number of lanes when setting link modes using the legacy IOCTL ethtool interface. Since 'struct ethtool_link_ksettings' is not initialized in this path, drivers receive an uninitialized number of lanes in 'struct ethtool_link_ksettings::lanes'. When this information is later queried from drivers, it results in the ethtool code making decisions based on uninitialized memory, leading to the following KMSAN splat [1]. In practice, this most likely only happens with the tun driver that simply returns whatever it got in the set operation. As far as I can tell, this uninitialized memory is not leaked to user space thanks to the 'ethtool_ops->cap_link_lanes_supported' check in linkmodes_prepare_data(). Fix by initializing the structure in the IOCTL path. Did not find any more call sites that pass an uninitialized structure when calling 'ethtool_ops::set_link_ksettings()'. [1] BUG: KMSAN: uninit-value in ethnl_update_linkmodes net/ethtool/linkmodes.c:273 [inline] BUG: KMSAN: uninit-value in ethnl_set_linkmodes+0x190b/0x19d0 net/ethtool/linkmodes.c:333 ethnl_update_linkmodes net/ethtool/linkmodes.c:273 [inline] ethnl_set_linkmodes+0x190b/0x19d0 net/ethtool/linkmodes.c:333 ethnl_default_set_doit+0x88d/0xde0 net/ethtool/netlink.c:640 genl_family_rcv_msg_doit net/netlink/genetlink.c:968 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline] genl_rcv_msg+0x141a/0x14c0 net/netlink/genetlink.c:1065 netlink_rcv_skb+0x3f8/0x750 net/netlink/af_netlink.c:2577 genl_rcv+0x40/0x60 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0xf41/0x1270 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x127d/0x1430 net/netlink/af_netlink.c:1942 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg net/socket.c:747 [inline] ____sys_sendmsg+0xa24/0xe40 net/socket.c:2501 ___sys_sendmsg+0x2a1/0x3f0 net/socket.c:2555 __sys_sendmsg net/socket.c:2584 [inline] __do_sys_sendmsg net/socket.c:2593 [inline] __se_sys_sendmsg net/socket.c:2591 [inline] __x64_sys_sendmsg+0x36b/0x540 net/socket.c:2591 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was stored to memory at: tun_get_link_ksettings+0x37/0x60 drivers/net/tun.c:3544 __ethtool_get_link_ksettings+0x17b/0x260 net/ethtool/ioctl.c:441 ethnl_set_linkmodes+0xee/0x19d0 net/ethtool/linkmodes.c:327 ethnl_default_set_doit+0x88d/0xde0 net/ethtool/netlink.c:640 genl_family_rcv_msg_doit net/netlink/genetlink.c:968 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline] genl_rcv_msg+0x141a/0x14c0 net/netlink/genetlink.c:1065 netlink_rcv_skb+0x3f8/0x750 net/netlink/af_netlink.c:2577 genl_rcv+0x40/0x60 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0xf41/0x1270 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x127d/0x1430 net/netlink/af_netlink.c:1942 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg net/socket.c:747 [inline] ____sys_sendmsg+0xa24/0xe40 net/socket.c:2501 ___sys_sendmsg+0x2a1/0x3f0 net/socket.c:2555 __sys_sendmsg net/socket.c:2584 [inline] __do_sys_sendmsg net/socket.c:2593 [inline] __se_sys_sendmsg net/socket.c:2591 [inline] __x64_sys_sendmsg+0x36b/0x540 net/socket.c:2591 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was stored to memory at: tun_set_link_ksettings+0x37/0x60 drivers/net/tun.c:3553 ethtool_set_link_ksettings+0x600/0x690 net/ethtool/ioctl.c:609 __dev_ethtool net/ethtool/ioctl.c:3024 [inline] dev_ethtool+0x1db9/0x2a70 net/ethtool/ioctl.c:3078 dev_ioctl+0xb07/0x1270 net/core/dev_ioctl.c:524 sock_do_ioctl+0x295/0x540 net/socket.c:1213 sock_ioctl+0x729/0xd90 net/socket.c:1316 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl+0x222/0x400 fs/ioctl.c:856 __x64_sys_ioctl+0x96/0xe0 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Local variable link_ksettings created at: ethtool_set_link_ksettings+0x54/0x690 net/ethtool/ioctl.c:577 __dev_ethtool net/ethtool/ioctl.c:3024 [inline] dev_ethtool+0x1db9/0x2a70 net/ethtool/ioctl.c:3078 Fixes: 012ce4dd3102 ("ethtool: Extend link modes settings uAPI with lanes") Reported-and-tested-by: syzbot+ef6edd9f1baaa54d6235@syzkaller.appspotmail.com Link: https://lore.kernel.org/netdev/0000000000004bb41105fa70f361@google.com/ Reviewed-by: Danielle Ratson Signed-off-by: Ido Schimmel Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- net/ethtool/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 59adc4e6e9ee..6bb778e10461 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -574,8 +574,8 @@ static int ethtool_get_link_ksettings(struct net_device *dev, static int ethtool_set_link_ksettings(struct net_device *dev, void __user *useraddr) { + struct ethtool_link_ksettings link_ksettings = {}; int err; - struct ethtool_link_ksettings link_ksettings; ASSERT_RTNL(); From 4a54903ff68ddb33b6463c94b4eb37fc584ef760 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 2 May 2023 11:35:36 -0700 Subject: [PATCH 50/69] ionic: catch failure from devlink_alloc Add a check for NULL on the alloc return. If devlink_alloc() fails and we try to use devlink_priv() on the NULL return, the kernel gets very unhappy and panics. With this fix, the driver load will still fail, but at least it won't panic the kernel. Fixes: df69ba43217d ("ionic: Add basic framework for IONIC Network device driver") Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_devlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index e6ff757895ab..4ec66a6be073 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -61,6 +61,8 @@ struct ionic *ionic_devlink_alloc(struct device *dev) struct devlink *dl; dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic), dev); + if (!dl) + return NULL; return devlink_priv(dl); } From ec788f7e96ad4cd30338a09d466d9368acd73c26 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 2 May 2023 13:27:52 -0700 Subject: [PATCH 51/69] pds_core: remove CONFIG_DEBUG_FS from makefile This cruft from previous drafts should have been removed when the code was updated to not use the old style dummy helpers. Fixes: 55435ea7729a ("pds_core: initial framework for pds_core PF driver") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/pds_core/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/Makefile b/drivers/net/ethernet/amd/pds_core/Makefile index 0abc33ce826c..8239742e681f 100644 --- a/drivers/net/ethernet/amd/pds_core/Makefile +++ b/drivers/net/ethernet/amd/pds_core/Makefile @@ -9,6 +9,5 @@ pds_core-y := main.o \ dev.o \ adminq.o \ core.o \ + debugfs.o \ fw.o - -pds_core-$(CONFIG_DEBUG_FS) += debugfs.o From 1eeb807ffd8da5180f8d64d89ce332876acb5dbd Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 2 May 2023 13:40:32 -0700 Subject: [PATCH 52/69] pds_core: add AUXILIARY_BUS and NET_DEVLINK to Kconfig Add selecting of AUXILIARY_BUS and NET_DEVLINK to the pds_core Kconfig. Link: https://lore.kernel.org/netdev/ZE%2FduNH3lBLreNkJ@corigine.com/ Fixes: ddbcb22055d1 ("pds_core: Kconfig and pds_core.rst") Suggested-by: Simon Horman Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index 235fcacef5c5..f8cc8925161c 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -189,6 +189,8 @@ config AMD_XGBE_HAVE_ECC config PDS_CORE tristate "AMD/Pensando Data Systems Core Device Support" depends on 64BIT && PCI + select AUXILIARY_BUS + select NET_DEVLINK help This enables the support for the AMD/Pensando Core device family of adapters. More specific information on this driver can be From 3ce29c17dc847bf4245e16aad78a7617afa96297 Mon Sep 17 00:00:00 2001 From: Song Yoong Siang Date: Tue, 2 May 2023 08:48:06 -0700 Subject: [PATCH 53/69] igc: read before write to SRRCTL register igc_configure_rx_ring() function will be called as part of XDP program setup. If Rx hardware timestamp is enabled prio to XDP program setup, this timestamp enablement will be overwritten when buffer size is written into SRRCTL register. Thus, this commit read the register value before write to SRRCTL register. This commit is tested by using xdp_hw_metadata bpf selftest tool. The tool enables Rx hardware timestamp and then attach XDP program to igc driver. It will display hardware timestamp of UDP packet with port number 9092. Below are detail of test steps and results. Command on DUT: sudo ./xdp_hw_metadata Command on Link Partner: echo -n skb | nc -u -q1 9092 Result before this patch: skb hwtstamp is not found! Result after this patch: found skb hwtstamp = 1677800973.642836757 Optionally, read PHC to confirm the values obtained are almost the same: Command: sudo ./testptp -d /dev/ptp0 -g Result: clock time: 1677800973.913598978 or Fri Mar 3 07:49:33 2023 Fixes: fc9df2a0b520 ("igc: Enable RX via AF_XDP zero-copy") Cc: # 5.14+ Signed-off-by: Song Yoong Siang Reviewed-by: Jacob Keller Reviewed-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Naama Meir Signed-off-by: Tony Nguyen Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igc/igc_base.h | 11 ++++++++--- drivers/net/ethernet/intel/igc/igc_main.c | 7 +++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h index 7a992befca24..9f3827eda157 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.h +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -87,8 +87,13 @@ union igc_adv_rx_desc { #define IGC_RXDCTL_SWFLUSH 0x04000000 /* Receive Software Flush */ /* SRRCTL bit definitions */ -#define IGC_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */ -#define IGC_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ -#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 +#define IGC_SRRCTL_BSIZEPKT_MASK GENMASK(6, 0) +#define IGC_SRRCTL_BSIZEPKT(x) FIELD_PREP(IGC_SRRCTL_BSIZEPKT_MASK, \ + (x) / 1024) /* in 1 KB resolution */ +#define IGC_SRRCTL_BSIZEHDR_MASK GENMASK(13, 8) +#define IGC_SRRCTL_BSIZEHDR(x) FIELD_PREP(IGC_SRRCTL_BSIZEHDR_MASK, \ + (x) / 64) /* in 64 bytes resolution */ +#define IGC_SRRCTL_DESCTYPE_MASK GENMASK(27, 25) +#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF FIELD_PREP(IGC_SRRCTL_DESCTYPE_MASK, 1) #endif /* _IGC_BASE_H */ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index ba49728be919..1c4676882082 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -640,8 +640,11 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter, else buf_size = IGC_RXBUFFER_2048; - srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT; + srrctl = rd32(IGC_SRRCTL(reg_idx)); + srrctl &= ~(IGC_SRRCTL_BSIZEPKT_MASK | IGC_SRRCTL_BSIZEHDR_MASK | + IGC_SRRCTL_DESCTYPE_MASK); + srrctl |= IGC_SRRCTL_BSIZEHDR(IGC_RX_HDR_LEN); + srrctl |= IGC_SRRCTL_BSIZEPKT(buf_size); srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; wr32(IGC_SRRCTL(reg_idx), srrctl); From 6a341729fb31b4c5df9f74f24b4b1c98410c9b87 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 1 May 2023 13:28:57 -0700 Subject: [PATCH 54/69] af_packet: Don't send zero-byte data in packet_sendmsg_spkt(). syzkaller reported a warning below [0]. We can reproduce it by sending 0-byte data from the (AF_PACKET, SOCK_PACKET) socket via some devices whose dev->hard_header_len is 0. struct sockaddr_pkt addr = { .spkt_family = AF_PACKET, .spkt_device = "tun0", }; int fd; fd = socket(AF_PACKET, SOCK_PACKET, 0); sendto(fd, NULL, 0, 0, (struct sockaddr *)&addr, sizeof(addr)); We have a similar fix for the (AF_PACKET, SOCK_RAW) socket as commit dc633700f00f ("net/af_packet: check len when min_header_len equals to 0"). Let's add the same test for the SOCK_PACKET socket. [0]: skb_assert_len WARNING: CPU: 1 PID: 19945 at include/linux/skbuff.h:2552 skb_assert_len include/linux/skbuff.h:2552 [inline] WARNING: CPU: 1 PID: 19945 at include/linux/skbuff.h:2552 __dev_queue_xmit+0x1f26/0x31d0 net/core/dev.c:4159 Modules linked in: CPU: 1 PID: 19945 Comm: syz-executor.0 Not tainted 6.3.0-rc7-02330-gca6270c12e20 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:skb_assert_len include/linux/skbuff.h:2552 [inline] RIP: 0010:__dev_queue_xmit+0x1f26/0x31d0 net/core/dev.c:4159 Code: 89 de e8 1d a2 85 fd 84 db 75 21 e8 64 a9 85 fd 48 c7 c6 80 2a 1f 86 48 c7 c7 c0 06 1f 86 c6 05 23 cf 27 04 01 e8 fa ee 56 fd <0f> 0b e8 43 a9 85 fd 0f b6 1d 0f cf 27 04 31 ff 89 de e8 e3 a1 85 RSP: 0018:ffff8880217af6e0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffc90001133000 RDX: 0000000000040000 RSI: ffffffff81186922 RDI: 0000000000000001 RBP: ffff8880217af8b0 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff888030045640 R13: ffff8880300456b0 R14: ffff888030045650 R15: ffff888030045718 FS: 00007fc5864da640(0000) GS:ffff88806cd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020005740 CR3: 000000003f856003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: dev_queue_xmit include/linux/netdevice.h:3085 [inline] packet_sendmsg_spkt+0xc4b/0x1230 net/packet/af_packet.c:2066 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg+0x1b4/0x200 net/socket.c:747 ____sys_sendmsg+0x331/0x970 net/socket.c:2503 ___sys_sendmsg+0x11d/0x1c0 net/socket.c:2557 __sys_sendmmsg+0x18c/0x430 net/socket.c:2643 __do_sys_sendmmsg net/socket.c:2672 [inline] __se_sys_sendmmsg net/socket.c:2669 [inline] __x64_sys_sendmmsg+0x9c/0x100 net/socket.c:2669 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3c/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fc58791de5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007fc5864d9cc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00000000004bbf80 RCX: 00007fc58791de5d RDX: 0000000000000001 RSI: 0000000020005740 RDI: 0000000000000004 RBP: 00000000004bbf80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fc58797e530 R15: 0000000000000000 ---[ end trace 0000000000000000 ]--- skb len=0 headroom=16 headlen=0 tailroom=304 mac=(16,0) net=(16,-1) trans=-1 shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0)) csum(0x0 ip_summed=0 complete_sw=0 valid=0 level=0) hash(0x0 sw=0 l4=0) proto=0x0000 pkttype=0 iif=0 dev name=sit0 feat=0x00000006401d7869 sk family=17 type=10 proto=0 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6080c0db0814..640d94e34635 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2033,7 +2033,7 @@ retry: goto retry; } - if (!dev_validate_header(dev, skb->data, len)) { + if (!dev_validate_header(dev, skb->data, len) || !skb->len) { err = -EINVAL; goto out_unlock; } From f057b63bc11d86a98176de31b437e46789f44d8f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 3 May 2023 12:00:18 +0200 Subject: [PATCH 55/69] netfilter: nf_tables: fix ct untracked match breakage "ct untracked" no longer works properly due to erroneous NFT_BREAK. We have to check ctinfo enum first. Fixes: d9e789147605 ("netfilter: nf_tables: avoid retpoline overhead for some ct expression calls") Reported-by: Rvfg Link: https://marc.info/?l=netfilter&m=168294996212038&w=2 Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct_fast.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_ct_fast.c b/net/netfilter/nft_ct_fast.c index 89983b0613fa..e684c8a91848 100644 --- a/net/netfilter/nft_ct_fast.c +++ b/net/netfilter/nft_ct_fast.c @@ -15,10 +15,6 @@ void nft_ct_get_fast_eval(const struct nft_expr *expr, unsigned int state; ct = nf_ct_get(pkt->skb, &ctinfo); - if (!ct) { - regs->verdict.code = NFT_BREAK; - return; - } switch (priv->key) { case NFT_CT_STATE: @@ -30,6 +26,16 @@ void nft_ct_get_fast_eval(const struct nft_expr *expr, state = NF_CT_STATE_INVALID_BIT; *dest = state; return; + default: + break; + } + + if (!ct) { + regs->verdict.code = NFT_BREAK; + return; + } + + switch (priv->key) { case NFT_CT_DIRECTION: nft_reg_store8(dest, CTINFO2DIR(ctinfo)); return; From d7385ba137711ea71527a605cac162610a621de8 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Thu, 4 May 2023 15:47:23 -0400 Subject: [PATCH 56/69] 9p: Remove INET dependency 9pfs can run over assorted transports, so it doesn't have an INET dependency. Drop it and remove the includes of linux/inet.h. NET_9P_FD/trans_fd.o builds without INET or UNIX and is usable over plain file descriptors. However, tcp and unix functionality is still built and would generate runtime failures if used. Add imply INET and UNIX to NET_9P_FD, so functionality is enabled by default but can still be explicitly disabled. This allows configuring 9pfs over Xen with INET and UNIX disabled. Signed-off-by: Jason Andryuk Signed-off-by: David S. Miller --- fs/9p/Kconfig | 2 +- fs/9p/vfs_addr.c | 1 - fs/9p/vfs_dentry.c | 1 - fs/9p/vfs_dir.c | 1 - fs/9p/vfs_file.c | 1 - fs/9p/vfs_inode.c | 1 - fs/9p/vfs_inode_dotl.c | 1 - fs/9p/vfs_super.c | 1 - net/9p/Kconfig | 2 ++ 9 files changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index d7bc93447c85..0c63df574ee7 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config 9P_FS tristate "Plan 9 Resource Sharing Support (9P2000)" - depends on INET && NET_9P + depends on NET_9P select NETFS_SUPPORT help If you say Y here, you will get experimental support for diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 6f46d7e4c750..425956eb9fde 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index 65fa2df5e49b..f16f73581634 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 3d74b04fe0de..52bf87934650 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 44c15eb2b908..367a851eaa82 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 1d523bec0a94..502ac74e4959 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 331ed60d8fcb..a7da49906d99 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 266c4693e20c..10449994a972 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/net/9p/Kconfig b/net/9p/Kconfig index deabbd376cb1..00ebce9e5a65 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -17,6 +17,8 @@ if NET_9P config NET_9P_FD default NET_9P + imply INET + imply UNIX tristate "9P FD Transport" help This builds support for transports over TCP, Unix sockets and From 37c218d8021e36e226add4bab93d071d30fe0704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Wed, 3 May 2023 00:09:46 +0300 Subject: [PATCH 57/69] net: dsa: mt7530: fix corrupt frames using trgmii on 40 MHz XTAL MT7621 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The multi-chip module MT7530 switch with a 40 MHz oscillator on the MT7621AT, MT7621DAT, and MT7621ST SoCs forwards corrupt frames using trgmii. This is caused by the assumption that MT7621 SoCs have got 150 MHz PLL, hence using the ncpo1 value, 0x0780. My testing shows this value works on Unielec U7621-06, Bartel's testing shows it won't work on Hi-Link HLK-MT7621A and Netgear WAC104. All devices tested have got 40 MHz oscillators. Using the value for 125 MHz PLL, 0x0640, works on all boards at hand. The definitions for 125 MHz PLL exist on the Banana Pi BPI-R2 BSP source code whilst 150 MHz PLL don't. Forwarding frames using trgmii on the MCM MT7530 switch with a 25 MHz oscillator on the said MT7621 SoCs works fine because the ncpo1 value defined for it is for 125 MHz PLL. Change the 150 MHz PLL comment to 125 MHz PLL, and use the 125 MHz PLL ncpo1 values for both oscillator frequencies. Link: https://github.com/BPI-SINOVOIP/BPI-R2-bsp/blob/81d24bbce7d99524d0771a8bdb2d6663e4eb4faa/u-boot-mt/drivers/net/rt2880_eth.c#L2195 Fixes: 7ef6f6f8d237 ("net: dsa: mt7530: Add MT7621 TRGMII mode support") Tested-by: Bartel Eerdekens Signed-off-by: Arınç ÜNAL Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index c680873819b0..7d9f9563dbda 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -426,9 +426,9 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) else ssc_delta = 0x87; if (priv->id == ID_MT7621) { - /* PLL frequency: 150MHz: 1.2GBit */ + /* PLL frequency: 125MHz: 1.0GBit */ if (xtal == HWTRAP_XTAL_40MHZ) - ncpo1 = 0x0780; + ncpo1 = 0x0640; if (xtal == HWTRAP_XTAL_25MHZ) ncpo1 = 0x0a00; } else { /* PLL frequency: 250MHz: 2.0Gbit */ From 120a56b01beed51ab5956a734adcfd2760307107 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Wed, 3 May 2023 00:09:47 +0300 Subject: [PATCH 58/69] net: dsa: mt7530: fix network connectivity with multiple CPU ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On mt753x_cpu_port_enable() there's code that enables flooding for the CPU port only. Since mt753x_cpu_port_enable() runs twice when both CPU ports are enabled, port 6 becomes the only port to forward the frames to. But port 5 is the active port, so no frames received from the user ports will be forwarded to port 5 which breaks network connectivity. Every bit of the BC_FFP, UNM_FFP, and UNU_FFP bits represents a port. Fix this issue by setting the bit that corresponds to the CPU port without overwriting the other bits. Clear the bits beforehand only for the MT7531 switch. According to the documents MT7621 Giga Switch Programming Guide v0.3 and MT7531 Reference Manual for Development Board v1.0, after reset, the BC_FFP, UNM_FFP, and UNU_FFP bits are set to 1 for MT7531, 0 for MT7530. The commit 5e5502e012b8 ("net: dsa: mt7530: fix roaming from DSA user ports") silently changed the method to set the bits on the MT7530_MFC. Instead of clearing the relevant bits before mt7530_cpu_port_enable() which runs under a for loop, the commit started doing it on mt7530_cpu_port_enable(). Back then, this didn't really matter as only a single CPU port could be used since the CPU port number was hardcoded. The driver was later changed with commit 1f9a6abecf53 ("net: dsa: mt7530: get cpu-port via dp->cpu_dp instead of constant") to retrieve the CPU port via dp->cpu_dp. With that, this silent change became an issue for when using multiple CPU ports. Fixes: 5e5502e012b8 ("net: dsa: mt7530: fix roaming from DSA user ports") Signed-off-by: Arınç ÜNAL Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 7d9f9563dbda..9bc54e1348cb 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1002,9 +1002,9 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) mt7530_write(priv, MT7530_PVC_P(port), PORT_SPEC_TAG); - /* Disable flooding by default */ - mt7530_rmw(priv, MT7530_MFC, BC_FFP_MASK | UNM_FFP_MASK | UNU_FFP_MASK, - BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) | UNU_FFP(BIT(port))); + /* Enable flooding on the CPU port */ + mt7530_set(priv, MT7530_MFC, BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) | + UNU_FFP(BIT(port))); /* Set CPU port number */ if (priv->id == ID_MT7621) @@ -2367,6 +2367,10 @@ mt7531_setup_common(struct dsa_switch *ds) /* Enable and reset MIB counters */ mt7530_mib_reset(ds); + /* Disable flooding on all ports */ + mt7530_clear(priv, MT7530_MFC, BC_FFP_MASK | UNM_FFP_MASK | + UNU_FFP_MASK); + for (i = 0; i < MT7530_NUM_PORTS; i++) { /* Disable forwarding by default on all ports */ mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, From 9f699b71c2f31c51bd1483a20e1c8ddc5986a8c9 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Wed, 3 May 2023 08:39:35 -0700 Subject: [PATCH 59/69] ice: block LAN in case of VF to VF offload VF to VF traffic shouldn't go outside. To enforce it, set only the loopback enable bit in case of all ingress type rules added via the tc tool. Fixes: 0d08a441fb1a ("ice: ndo_setup_tc implementation for PF") Reported-by: Sujai Buvaneswaran Signed-off-by: Michal Swiatkowski Tested-by: George Kuruvinakunnel Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 76f29a5bf8d7..d1a31f236d26 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -693,17 +693,18 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) * results into order of switch rule evaluation. */ rule_info.priority = 7; + rule_info.flags_info.act_valid = true; if (fltr->direction == ICE_ESWITCH_FLTR_INGRESS) { rule_info.sw_act.flag |= ICE_FLTR_RX; rule_info.sw_act.src = hw->pf_id; rule_info.rx = true; + rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE; } else { rule_info.sw_act.flag |= ICE_FLTR_TX; rule_info.sw_act.src = vsi->idx; rule_info.rx = false; rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE; - rule_info.flags_info.act_valid = true; } /* specify the cookie as filter_rule_id */ From f8bb5104394560e29017c25bcade4c6b7aabd108 Mon Sep 17 00:00:00 2001 From: Wenliang Wang Date: Thu, 4 May 2023 10:27:06 +0800 Subject: [PATCH 60/69] virtio_net: suppress cpu stall when free_unused_bufs For multi-queue and large ring-size use case, the following error occurred when free_unused_bufs: rcu: INFO: rcu_sched self-detected stall on CPU. Fixes: 986a4f4d452d ("virtio_net: multiqueue support") Signed-off-by: Wenliang Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8d8038538fc4..a12ae26db0e2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3560,12 +3560,14 @@ static void free_unused_bufs(struct virtnet_info *vi) struct virtqueue *vq = vi->sq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) virtnet_sq_free_unused_buf(vq, buf); + cond_resched(); } for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->rq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) virtnet_rq_free_unused_buf(vq, buf); + cond_resched(); } } From c00ce5470a8adeaf681865836085f72633c00a7e Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Thu, 4 May 2023 08:28:01 +0100 Subject: [PATCH 61/69] sfc: Add back mailing list We used to have a mailing list in the MAINTAINERS file, but removed this when we became part of Xilinx as it stopped working. Now inside AMD we have the list again. Add it back so patches will be seen by all sfc developers. Signed-off-by: Martin Habets Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ebd26b3ca90e..dcab6b41ad8d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18987,6 +18987,7 @@ SFC NETWORK DRIVER M: Edward Cree M: Martin Habets L: netdev@vger.kernel.org +L: linux-net-drivers@amd.com S: Supported F: Documentation/networking/devlink/sfc.rst F: drivers/net/ethernet/sfc/ From 299efdc2380aac588557f4d0b2ce7bee05bd0cf2 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 4 May 2023 16:03:59 +0800 Subject: [PATCH 62/69] net: enetc: check the index of the SFI rather than the handle We should check whether the current SFI (Stream Filter Instance) table is full before creating a new SFI entry. However, the previous logic checks the handle by mistake and might lead to unpredictable behavior. Fixes: 888ae5a3952b ("net: enetc: add tc flower psfp offload driver") Signed-off-by: Wei Fang Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 130ebf6853e6..83c27bbbc6ed 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -1247,7 +1247,7 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv, int index; index = enetc_get_free_index(priv); - if (sfi->handle < 0) { + if (index < 0) { NL_SET_ERR_MSG_MOD(extack, "No Stream Filter resource!"); err = -ENOSPC; goto free_fmi; From cb9e6e584d58420df182102674e636fb841dae4c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 4 May 2023 11:52:49 +0200 Subject: [PATCH 63/69] bonding: add xdp_features support Introduce xdp_features support for bonding driver according to the slave devices attached to the master one. xdp_features is required whenever we want to xdp_redirect traffic into a bond device and then into selected slaves attached to it. Reviewed-by: Simon Horman Acked-by: Jay Vosburgh Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features") Signed-off-by: Lorenzo Bianconi Reviewed-by: Jussi Maki Tested-by: Jussi Maki Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 29 +++++++++++++++++++++++++++++ drivers/net/bonding/bond_options.c | 2 ++ include/net/bonding.h | 1 + 3 files changed, 32 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 710548dbd0c1..3fed888629f7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1789,6 +1789,26 @@ static void bond_ether_setup(struct net_device *bond_dev) bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; } +void bond_xdp_set_features(struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + xdp_features_t val = NETDEV_XDP_ACT_MASK; + struct list_head *iter; + struct slave *slave; + + ASSERT_RTNL(); + + if (!bond_xdp_check(bond)) { + xdp_clear_features_flag(bond_dev); + return; + } + + bond_for_each_slave(bond, slave, iter) + val &= slave->dev->xdp_features; + + xdp_set_features_flag(bond_dev, val); +} + /* enslave device to bond device */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, struct netlink_ext_ack *extack) @@ -2236,6 +2256,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bpf_prog_inc(bond->xdp_prog); } + bond_xdp_set_features(bond_dev); + slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n", bond_is_active_slave(new_slave) ? "an active" : "a backup", new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); @@ -2483,6 +2505,7 @@ static int __bond_release_one(struct net_device *bond_dev, if (!netif_is_bond_master(slave_dev)) slave_dev->priv_flags &= ~IFF_BONDING; + bond_xdp_set_features(bond_dev); kobject_put(&slave->kobj); return 0; @@ -3930,6 +3953,9 @@ static int bond_slave_netdev_event(unsigned long event, /* Propagate to master device */ call_netdevice_notifiers(event, slave->bond->dev); break; + case NETDEV_XDP_FEAT_CHANGE: + bond_xdp_set_features(bond_dev); + break; default: break; } @@ -5874,6 +5900,9 @@ void bond_setup(struct net_device *bond_dev) if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) bond_dev->features |= BOND_XFRM_FEATURES; #endif /* CONFIG_XFRM_OFFLOAD */ + + if (bond_xdp_check(bond)) + bond_dev->xdp_features = NETDEV_XDP_ACT_MASK; } /* Destroy a bonding device. diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index f71d5517f829..0498fc6731f8 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -877,6 +877,8 @@ static int bond_option_mode_set(struct bonding *bond, netdev_update_features(bond->dev); } + bond_xdp_set_features(bond->dev); + return 0; } diff --git a/include/net/bonding.h b/include/net/bonding.h index c3843239517d..a60a24923b55 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -659,6 +659,7 @@ void bond_destroy_sysfs(struct bond_net *net); void bond_prepare_sysfs_group(struct bonding *bond); int bond_sysfs_slave_add(struct slave *slave); void bond_sysfs_slave_del(struct slave *slave); +void bond_xdp_set_features(struct net_device *bond_dev); int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, struct netlink_ext_ack *extack); int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); From 26312c685ae0bca61e06ac75ee158b1e69546415 Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Thu, 4 May 2023 10:35:17 -0500 Subject: [PATCH 64/69] net: fec: correct the counting of XDP sent frames In the current xdp_xmit implementation, if any single frame fails to transmit due to insufficient buffer descriptors, the function nevertheless reports success in sending all frames. This results in erroneously indicating that frames were transmitted when in fact they were dropped. This patch fixes the issue by ensureing the return value properly indicates the actual number of frames successfully transmitted, rather than potentially reporting success for all frames when some could not transmit. Fixes: 6d6b39f180b8 ("net: fec: add initial XDP support") Signed-off-by: Gagandeep Singh Signed-off-by: Shenwei Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 160c1b3525f5..42ec6ca3bf03 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3798,7 +3798,8 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, entries_free = fec_enet_get_free_txdesc_num(txq); if (entries_free < MAX_SKB_FRAGS + 1) { netdev_err(fep->netdev, "NOT enough BD for SG!\n"); - return NETDEV_TX_OK; + xdp_return_frame(frame); + return NETDEV_TX_BUSY; } /* Fill in a Tx ring entry */ @@ -3856,6 +3857,7 @@ static int fec_enet_xdp_xmit(struct net_device *dev, struct fec_enet_private *fep = netdev_priv(dev); struct fec_enet_priv_tx_q *txq; int cpu = smp_processor_id(); + unsigned int sent_frames = 0; struct netdev_queue *nq; unsigned int queue; int i; @@ -3866,8 +3868,11 @@ static int fec_enet_xdp_xmit(struct net_device *dev, __netif_tx_lock(nq, cpu); - for (i = 0; i < num_frames; i++) - fec_enet_txq_xmit_frame(fep, txq, frames[i]); + for (i = 0; i < num_frames; i++) { + if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) != 0) + break; + sent_frames++; + } /* Make sure the update to bdp and tx_skbuff are performed. */ wmb(); @@ -3877,7 +3882,7 @@ static int fec_enet_xdp_xmit(struct net_device *dev, __netif_tx_unlock(nq); - return num_frames; + return sent_frames; } static const struct net_device_ops fec_netdev_ops = { From dd4f6bbfa646f258e5bcdfac57a5c413d687f588 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 4 May 2023 20:16:14 +0200 Subject: [PATCH 65/69] net/sched: flower: fix filter idr initialization The cited commit moved idr initialization too early in fl_change() which allows concurrent users to access the filter that is still being initialized and is in inconsistent state, which, in turn, can cause NULL pointer dereference [0]. Since there is no obvious way to fix the ordering without reverting the whole cited commit, alternative approach taken to first insert NULL pointer into idr in order to allocate the handle but still cause fl_get() to return NULL and prevent concurrent users from seeing the filter while providing miss-to-action infrastructure with valid handle id early in fl_change(). [ 152.434728] general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN [ 152.436163] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 152.437269] CPU: 4 PID: 3877 Comm: tc Not tainted 6.3.0-rc4+ #5 [ 152.438110] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 152.439644] RIP: 0010:fl_dump_key+0x8b/0x1d10 [cls_flower] [ 152.440461] Code: 01 f2 02 f2 c7 40 08 04 f2 04 f2 c7 40 0c 04 f3 f3 f3 65 48 8b 04 25 28 00 00 00 48 89 84 24 00 01 00 00 48 89 c8 48 c1 e8 03 <0f> b6 04 10 84 c0 74 08 3c 03 0f 8e 98 19 00 00 8b 13 85 d2 74 57 [ 152.442885] RSP: 0018:ffff88817a28f158 EFLAGS: 00010246 [ 152.443851] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 152.444826] RDX: dffffc0000000000 RSI: ffffffff8500ae80 RDI: ffff88810a987900 [ 152.445791] RBP: ffff888179d88240 R08: ffff888179d8845c R09: ffff888179d88240 [ 152.446780] R10: ffffed102f451e48 R11: 00000000fffffff2 R12: ffff88810a987900 [ 152.447741] R13: ffffffff8500ae80 R14: ffff88810a987900 R15: ffff888149b3c738 [ 152.448756] FS: 00007f5eb2a34800(0000) GS:ffff88881ec00000(0000) knlGS:0000000000000000 [ 152.449888] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 152.450685] CR2: 000000000046ad19 CR3: 000000010b0bd006 CR4: 0000000000370ea0 [ 152.451641] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 152.452628] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 152.453588] Call Trace: [ 152.454032] [ 152.454447] ? netlink_sendmsg+0x7a1/0xcb0 [ 152.455109] ? sock_sendmsg+0xc5/0x190 [ 152.455689] ? ____sys_sendmsg+0x535/0x6b0 [ 152.456320] ? ___sys_sendmsg+0xeb/0x170 [ 152.456916] ? do_syscall_64+0x3d/0x90 [ 152.457529] ? entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 152.458321] ? ___sys_sendmsg+0xeb/0x170 [ 152.458958] ? __sys_sendmsg+0xb5/0x140 [ 152.459564] ? do_syscall_64+0x3d/0x90 [ 152.460122] ? entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 152.460852] ? fl_dump_key_options.part.0+0xea0/0xea0 [cls_flower] [ 152.461710] ? _raw_spin_lock+0x7a/0xd0 [ 152.462299] ? _raw_read_lock_irq+0x30/0x30 [ 152.462924] ? nla_put+0x15e/0x1c0 [ 152.463480] fl_dump+0x228/0x650 [cls_flower] [ 152.464112] ? fl_tmplt_dump+0x210/0x210 [cls_flower] [ 152.464854] ? __kmem_cache_alloc_node+0x1a7/0x330 [ 152.465592] ? nla_put+0x15e/0x1c0 [ 152.466160] tcf_fill_node+0x515/0x9a0 [ 152.466766] ? tc_setup_offload_action+0xf0/0xf0 [ 152.467463] ? __alloc_skb+0x13c/0x2a0 [ 152.468067] ? __build_skb_around+0x330/0x330 [ 152.468814] ? fl_get+0x107/0x1a0 [cls_flower] [ 152.469503] tc_del_tfilter+0x718/0x1330 [ 152.470115] ? is_bpf_text_address+0xa/0x20 [ 152.470765] ? tc_ctl_chain+0xee0/0xee0 [ 152.471335] ? __kernel_text_address+0xe/0x30 [ 152.471948] ? unwind_get_return_address+0x56/0xa0 [ 152.472639] ? __thaw_task+0x150/0x150 [ 152.473218] ? arch_stack_walk+0x98/0xf0 [ 152.473839] ? __stack_depot_save+0x35/0x4c0 [ 152.474501] ? stack_trace_save+0x91/0xc0 [ 152.475119] ? security_capable+0x51/0x90 [ 152.475741] rtnetlink_rcv_msg+0x2c1/0x9d0 [ 152.476387] ? rtnl_calcit.isra.0+0x2b0/0x2b0 [ 152.477042] ? __sys_sendmsg+0xb5/0x140 [ 152.477664] ? do_syscall_64+0x3d/0x90 [ 152.478255] ? entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 152.479010] ? __stack_depot_save+0x35/0x4c0 [ 152.479679] ? __stack_depot_save+0x35/0x4c0 [ 152.480346] netlink_rcv_skb+0x12c/0x360 [ 152.480929] ? rtnl_calcit.isra.0+0x2b0/0x2b0 [ 152.481517] ? do_syscall_64+0x3d/0x90 [ 152.482061] ? netlink_ack+0x1550/0x1550 [ 152.482612] ? rhashtable_walk_peek+0x170/0x170 [ 152.483262] ? kmem_cache_alloc_node+0x1af/0x390 [ 152.483875] ? _copy_from_iter+0x3d6/0xc70 [ 152.484528] netlink_unicast+0x553/0x790 [ 152.485168] ? netlink_attachskb+0x6a0/0x6a0 [ 152.485848] ? unwind_next_frame+0x11cc/0x1a10 [ 152.486538] ? arch_stack_walk+0x61/0xf0 [ 152.487169] netlink_sendmsg+0x7a1/0xcb0 [ 152.487799] ? netlink_unicast+0x790/0x790 [ 152.488355] ? iovec_from_user.part.0+0x4d/0x220 [ 152.488990] ? _raw_spin_lock+0x7a/0xd0 [ 152.489598] ? netlink_unicast+0x790/0x790 [ 152.490236] sock_sendmsg+0xc5/0x190 [ 152.490796] ____sys_sendmsg+0x535/0x6b0 [ 152.491394] ? import_iovec+0x7/0x10 [ 152.491964] ? kernel_sendmsg+0x30/0x30 [ 152.492561] ? __copy_msghdr+0x3c0/0x3c0 [ 152.493160] ? do_syscall_64+0x3d/0x90 [ 152.493706] ___sys_sendmsg+0xeb/0x170 [ 152.494283] ? may_open_dev+0xd0/0xd0 [ 152.494858] ? copy_msghdr_from_user+0x110/0x110 [ 152.495541] ? __handle_mm_fault+0x2678/0x4ad0 [ 152.496205] ? copy_page_range+0x2360/0x2360 [ 152.496862] ? __fget_light+0x57/0x520 [ 152.497449] ? mas_find+0x1c0/0x1c0 [ 152.498026] ? sockfd_lookup_light+0x1a/0x140 [ 152.498703] __sys_sendmsg+0xb5/0x140 [ 152.499306] ? __sys_sendmsg_sock+0x20/0x20 [ 152.499951] ? do_user_addr_fault+0x369/0xd80 [ 152.500595] do_syscall_64+0x3d/0x90 [ 152.501185] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 152.501917] RIP: 0033:0x7f5eb294f887 [ 152.502494] Code: 0a 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 152.505008] RSP: 002b:00007ffd2c708f78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 152.506152] RAX: ffffffffffffffda RBX: 00000000642d9472 RCX: 00007f5eb294f887 [ 152.507134] RDX: 0000000000000000 RSI: 00007ffd2c708fe0 RDI: 0000000000000003 [ 152.508113] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 [ 152.509119] R10: 00007f5eb2808708 R11: 0000000000000246 R12: 0000000000000001 [ 152.510068] R13: 0000000000000000 R14: 00007ffd2c70d1b8 R15: 0000000000485400 [ 152.511031] [ 152.511444] Modules linked in: cls_flower sch_ingress openvswitch nsh mlx5_vdpa vringh vhost_iotlb vdpa mlx5_ib mlx5_core rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter overlay zram zsmalloc fuse [last unloaded: mlx5_core] [ 152.515720] ---[ end trace 0000000000000000 ]--- Fixes: 08a0063df3ae ("net/sched: flower: Move filter handle initialization earlier") Signed-off-by: Vlad Buslov Reviewed-by: Pedro Tammela Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 6ab6aadc07b8..4dc3a9007f30 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2210,10 +2210,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, spin_lock(&tp->lock); if (!handle) { handle = 1; - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + err = idr_alloc_u32(&head->handle_idr, NULL, &handle, INT_MAX, GFP_ATOMIC); } else { - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + err = idr_alloc_u32(&head->handle_idr, NULL, &handle, handle, GFP_ATOMIC); /* Filter with specified handle was concurrently @@ -2378,7 +2378,7 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, rcu_read_lock(); idr_for_each_entry_continue_ul(&head->handle_idr, f, tmp, id) { /* don't return filters that are being deleted */ - if (!refcount_inc_not_zero(&f->refcnt)) + if (!f || !refcount_inc_not_zero(&f->refcnt)) continue; rcu_read_unlock(); From 5110f3ff6d3c986df9575c8da86630578b7f0846 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 4 May 2023 20:16:15 +0200 Subject: [PATCH 66/69] Revert "net/sched: flower: Fix wrong handle assignment during filter change" This reverts commit 32eff6bacec2cb574677c15378169a9fa30043ef. Superseded by the following commit in this series. Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 4dc3a9007f30..ac4f344c52e0 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2231,8 +2231,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, kfree(fnew); goto errout_tb; } - fnew->handle = handle; } + fnew->handle = handle; err = tcf_exts_init_ex(&fnew->exts, net, TCA_FLOWER_ACT, 0, tp, handle, !tc_skip_hw(fnew->flags)); From fd741f0d9f702c193b2b44225c004f8c5d5be163 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 4 May 2023 20:16:16 +0200 Subject: [PATCH 67/69] net/sched: flower: fix error handler on replace When replacing a filter (i.e. 'fold' pointer is not NULL) the insertion of new filter to idr is postponed until later in code since handle is already provided by the user. However, the error handling code in fl_change() always assumes that the new filter had been inserted into idr. If error handler is reached when replacing existing filter it may remove it from idr therefore making it unreachable for delete or dump afterwards. Fix the issue by verifying that 'fold' argument wasn't provided by caller before calling idr_remove(). Fixes: 08a0063df3ae ("net/sched: flower: Move filter handle initialization earlier") Signed-off-by: Vlad Buslov Reviewed-by: Pedro Tammela Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index ac4f344c52e0..9dbc43388e57 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2339,7 +2339,8 @@ errout_hw: errout_mask: fl_mask_put(head, fnew->mask); errout_idr: - idr_remove(&head->handle_idr, fnew->handle); + if (!fold) + idr_remove(&head->handle_idr, fnew->handle); __fl_put(fnew); errout_tb: kfree(tb); From 1e76f42779d6a2e45107b34d79d86a57b8077630 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 4 May 2023 13:44:59 -0700 Subject: [PATCH 68/69] pds_core: fix mutex double unlock in error path Fix a double unlock in an error handling path by unlocking as soon as the error is seen and removing unlocks in the error cleanup path. Link: https://lore.kernel.org/kernel-janitors/209a09f6-5ec6-40c7-a5ec-6260d8f54d25@kili.mountain/ Fixes: 523847df1b37 ("pds_core: add devcmd device interfaces") Reported-by: Dan Carpenter Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/pds_core/main.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index e2d14b1ca471..672757932246 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -244,11 +244,16 @@ static int pdsc_init_pf(struct pdsc *pdsc) set_bit(PDSC_S_FW_DEAD, &pdsc->state); err = pdsc_setup(pdsc, PDSC_SETUP_INIT); - if (err) + if (err) { + mutex_unlock(&pdsc->config_lock); goto err_out_unmap_bars; + } + err = pdsc_start(pdsc); - if (err) + if (err) { + mutex_unlock(&pdsc->config_lock); goto err_out_teardown; + } mutex_unlock(&pdsc->config_lock); @@ -257,13 +262,15 @@ static int pdsc_init_pf(struct pdsc *pdsc) err = devl_params_register(dl, pdsc_dl_params, ARRAY_SIZE(pdsc_dl_params)); if (err) { + devl_unlock(dl); dev_warn(pdsc->dev, "Failed to register devlink params: %pe\n", ERR_PTR(err)); - goto err_out_unlock_dl; + goto err_out_stop; } hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc); if (IS_ERR(hr)) { + devl_unlock(dl); dev_warn(pdsc->dev, "Failed to create fw reporter: %pe\n", hr); err = PTR_ERR(hr); goto err_out_unreg_params; @@ -279,15 +286,13 @@ static int pdsc_init_pf(struct pdsc *pdsc) return 0; err_out_unreg_params: - devl_params_unregister(dl, pdsc_dl_params, - ARRAY_SIZE(pdsc_dl_params)); -err_out_unlock_dl: - devl_unlock(dl); + devlink_params_unregister(dl, pdsc_dl_params, + ARRAY_SIZE(pdsc_dl_params)); +err_out_stop: pdsc_stop(pdsc); err_out_teardown: pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING); err_out_unmap_bars: - mutex_unlock(&pdsc->config_lock); del_timer_sync(&pdsc->wdtimer); if (pdsc->wq) destroy_workqueue(pdsc->wq); From 93e0401e0fc0c54b0ac05b687cd135c2ac38187c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 4 May 2023 16:07:27 -0700 Subject: [PATCH 69/69] net: bcmgenet: Remove phy_stop() from bcmgenet_netif_stop() The call to phy_stop() races with the later call to phy_disconnect(), resulting in concurrent phy_suspend() calls being run from different CPUs. The final call to phy_disconnect() ensures that the PHY is stopped and suspended, too. Fixes: c96e731c93ff ("net: bcmgenet: connect and disconnect from the PHY state machine") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index d937daa8ee88..f28ffc31df22 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3465,7 +3465,6 @@ static void bcmgenet_netif_stop(struct net_device *dev) /* Disable MAC transmit. TX DMA disabled must be done before this */ umac_enable_set(priv, CMD_TX_EN, false); - phy_stop(dev->phydev); bcmgenet_disable_rx_napi(priv); bcmgenet_intr_disable(priv);