Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue

Tony Nguyen says:

====================
ice: fix AF_XDP ZC timeout and concurrency issues

Maciej Fijalkowski says:

Changes included in this patchset address an issue that customer has
been facing when AF_XDP ZC Tx sockets were used in combination with flow
control and regular Tx traffic.

After executing:
ethtool --set-priv-flags $dev link-down-on-close on
ethtool -A $dev rx on tx on

launching multiple ZC Tx sockets on $dev + pinging remote interface (so
that regular Tx traffic is present) and then going through down/up of
$dev, Tx timeout occurred and then most of the time ice driver was unable
to recover from that state.

These patches combined together solve the described above issue on
customer side. Main focus here is to forbid producing Tx descriptors when
either carrier is not yet initialized or process of bringing interface
down has already started.

v1: https://lore.kernel.org/netdev/20240708221416.625850-1-anthony.l.nguyen@intel.com/

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  ice: xsk: fix txq interrupt mapping
  ice: add missing WRITE_ONCE when clearing ice_rx_ring::xdp_prog
  ice: improve updating ice_{t,r}x_ring::xsk_pool
  ice: toggle netif_carrier when setting up XSK pool
  ice: modify error handling when setting XSK pool in ndo_bpf
  ice: replace synchronize_rcu with synchronize_net
  ice: don't busy wait for Rx queue disable in ice_qp_dis()
  ice: respect netif readiness in AF_XDP ZC related ndo's
====================

Link: https://patch.msgid.link/20240729200716.681496-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-07-30 18:41:10 -07:00
commit 0bf50cead4
6 changed files with 135 additions and 90 deletions

View File

@ -765,18 +765,17 @@ static inline struct xsk_buff_pool *ice_get_xp_from_qid(struct ice_vsi *vsi,
}
/**
* ice_xsk_pool - get XSK buffer pool bound to a ring
* ice_rx_xsk_pool - assign XSK buff pool to Rx ring
* @ring: Rx ring to use
*
* Returns a pointer to xsk_buff_pool structure if there is a buffer pool
* present, NULL otherwise.
* Sets XSK buff pool pointer on Rx ring.
*/
static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
static inline void ice_rx_xsk_pool(struct ice_rx_ring *ring)
{
struct ice_vsi *vsi = ring->vsi;
u16 qid = ring->q_index;
return ice_get_xp_from_qid(vsi, qid);
WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid));
}
/**
@ -801,7 +800,7 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
if (!ring)
return;
ring->xsk_pool = ice_get_xp_from_qid(vsi, qid);
WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid));
}
/**

View File

@ -536,7 +536,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
return err;
}
ring->xsk_pool = ice_xsk_pool(ring);
ice_rx_xsk_pool(ring);
if (ring->xsk_pool) {
xdp_rxq_info_unreg(&ring->xdp_rxq);
@ -597,7 +597,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
return 0;
}
ok = ice_alloc_rx_bufs_zc(ring, num_bufs);
ok = ice_alloc_rx_bufs_zc(ring, ring->xsk_pool, num_bufs);
if (!ok) {
u16 pf_q = ring->vsi->rxq_map[ring->q_index];

View File

@ -2948,7 +2948,7 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
ice_for_each_rxq(vsi, i) {
struct ice_rx_ring *rx_ring = vsi->rx_rings[i];
if (rx_ring->xsk_pool)
if (READ_ONCE(rx_ring->xsk_pool))
napi_schedule(&rx_ring->q_vector->napi);
}
}

View File

@ -456,7 +456,7 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring)
if (rx_ring->vsi->type == ICE_VSI_PF)
if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
rx_ring->xdp_prog = NULL;
WRITE_ONCE(rx_ring->xdp_prog, NULL);
if (rx_ring->xsk_pool) {
kfree(rx_ring->xdp_buf);
rx_ring->xdp_buf = NULL;
@ -1521,10 +1521,11 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
ice_for_each_tx_ring(tx_ring, q_vector->tx) {
struct xsk_buff_pool *xsk_pool = READ_ONCE(tx_ring->xsk_pool);
bool wd;
if (tx_ring->xsk_pool)
wd = ice_xmit_zc(tx_ring);
if (xsk_pool)
wd = ice_xmit_zc(tx_ring, xsk_pool);
else if (ice_ring_is_xdp(tx_ring))
wd = true;
else
@ -1550,6 +1551,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
budget_per_ring = budget;
ice_for_each_rx_ring(rx_ring, q_vector->rx) {
struct xsk_buff_pool *xsk_pool = READ_ONCE(rx_ring->xsk_pool);
int cleaned;
/* A dedicated path for zero-copy allows making a single
@ -1557,7 +1559,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
* ice_clean_rx_irq function and makes the codebase cleaner.
*/
cleaned = rx_ring->xsk_pool ?
ice_clean_rx_irq_zc(rx_ring, budget_per_ring) :
ice_clean_rx_irq_zc(rx_ring, xsk_pool, budget_per_ring) :
ice_clean_rx_irq(rx_ring, budget_per_ring);
work_done += cleaned;
/* if we clean as many as budgeted, we must not be done */

View File

@ -52,10 +52,8 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
{
ice_clean_tx_ring(vsi->tx_rings[q_idx]);
if (ice_is_xdp_ena_vsi(vsi)) {
synchronize_rcu();
if (ice_is_xdp_ena_vsi(vsi))
ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
}
ice_clean_rx_ring(vsi->rx_rings[q_idx]);
}
@ -112,25 +110,29 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring,
* ice_qvec_cfg_msix - Enable IRQ for given queue vector
* @vsi: the VSI that contains queue vector
* @q_vector: queue vector
* @qid: queue index
*/
static void
ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, u16 qid)
{
u16 reg_idx = q_vector->reg_idx;
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
int q, _qid = qid;
ice_cfg_itr(hw, q_vector);
ice_for_each_tx_ring(tx_ring, q_vector->tx)
ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx,
q_vector->tx.itr_idx);
for (q = 0; q < q_vector->num_ring_tx; q++) {
ice_cfg_txq_interrupt(vsi, _qid, reg_idx, q_vector->tx.itr_idx);
_qid++;
}
ice_for_each_rx_ring(rx_ring, q_vector->rx)
ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx,
q_vector->rx.itr_idx);
_qid = qid;
for (q = 0; q < q_vector->num_ring_rx; q++) {
ice_cfg_rxq_interrupt(vsi, _qid, reg_idx, q_vector->rx.itr_idx);
_qid++;
}
ice_flush(hw);
}
@ -164,6 +166,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
int timeout = 50;
int fail = 0;
int err;
if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
@ -180,15 +183,17 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
usleep_range(1000, 2000);
}
synchronize_net();
netif_carrier_off(vsi->netdev);
netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
ice_qvec_dis_irq(vsi, rx_ring, q_vector);
ice_qvec_toggle_napi(vsi, q_vector, false);
netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
if (err)
return err;
if (!fail)
fail = err;
if (ice_is_xdp_ena_vsi(vsi)) {
struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
@ -196,17 +201,15 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
&txq_meta);
if (err)
return err;
if (!fail)
fail = err;
}
err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
if (err)
return err;
ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, false);
ice_qp_clean_rings(vsi, q_idx);
ice_qp_reset_stats(vsi, q_idx);
return 0;
return fail;
}
/**
@ -219,40 +222,48 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
{
struct ice_q_vector *q_vector;
int fail = 0;
bool link_up;
int err;
err = ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx);
if (err)
return err;
if (!fail)
fail = err;
if (ice_is_xdp_ena_vsi(vsi)) {
struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
err = ice_vsi_cfg_single_txq(vsi, vsi->xdp_rings, q_idx);
if (err)
return err;
if (!fail)
fail = err;
ice_set_ring_xdp(xdp_ring);
ice_tx_xsk_pool(vsi, q_idx);
}
err = ice_vsi_cfg_single_rxq(vsi, q_idx);
if (err)
return err;
if (!fail)
fail = err;
q_vector = vsi->rx_rings[q_idx]->q_vector;
ice_qvec_cfg_msix(vsi, q_vector);
ice_qvec_cfg_msix(vsi, q_vector, q_idx);
err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true);
if (err)
return err;
if (!fail)
fail = err;
ice_qvec_toggle_napi(vsi, q_vector, true);
ice_qvec_ena_irq(vsi, q_vector);
netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
/* make sure NAPI sees updated ice_{t,x}_ring::xsk_pool */
synchronize_net();
ice_get_link_status(vsi->port_info, &link_up);
if (link_up) {
netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
netif_carrier_on(vsi->netdev);
}
clear_bit(ICE_CFG_BUSY, vsi->state);
return 0;
return fail;
}
/**
@ -459,6 +470,7 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
/**
* __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
* @rx_ring: Rx ring
* @xsk_pool: XSK buffer pool to pick buffers to be filled by HW
* @count: The number of buffers to allocate
*
* Place the @count of descriptors onto Rx ring. Handle the ring wrap
@ -467,7 +479,8 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
*
* Returns true if all allocations were successful, false if any fail.
*/
static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring,
struct xsk_buff_pool *xsk_pool, u16 count)
{
u32 nb_buffs_extra = 0, nb_buffs = 0;
union ice_32b_rx_flex_desc *rx_desc;
@ -479,8 +492,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
xdp = ice_xdp_buf(rx_ring, ntu);
if (ntu + count >= rx_ring->count) {
nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
rx_desc,
nb_buffs_extra = ice_fill_rx_descs(xsk_pool, xdp, rx_desc,
rx_ring->count - ntu);
if (nb_buffs_extra != rx_ring->count - ntu) {
ntu += nb_buffs_extra;
@ -493,7 +505,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
ice_release_rx_desc(rx_ring, 0);
}
nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
nb_buffs = ice_fill_rx_descs(xsk_pool, xdp, rx_desc, count);
ntu += nb_buffs;
if (ntu == rx_ring->count)
@ -509,6 +521,7 @@ exit:
/**
* ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
* @rx_ring: Rx ring
* @xsk_pool: XSK buffer pool to pick buffers to be filled by HW
* @count: The number of buffers to allocate
*
* Wrapper for internal allocation routine; figure out how many tail
@ -516,7 +529,8 @@ exit:
*
* Returns true if all calls to internal alloc routine succeeded
*/
bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring,
struct xsk_buff_pool *xsk_pool, u16 count)
{
u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
u16 leftover, i, tail_bumps;
@ -525,9 +539,9 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
leftover = count - (tail_bumps * rx_thresh);
for (i = 0; i < tail_bumps; i++)
if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
if (!__ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, rx_thresh))
return false;
return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
return __ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, leftover);
}
/**
@ -596,8 +610,10 @@ out:
/**
* ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
* @xdp_ring: XDP Tx ring
* @xsk_pool: AF_XDP buffer pool pointer
*/
static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring,
struct xsk_buff_pool *xsk_pool)
{
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *tx_desc;
@ -648,7 +664,7 @@ skip:
if (xdp_ring->next_to_clean >= cnt)
xdp_ring->next_to_clean -= cnt;
if (xsk_frames)
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
xsk_tx_completed(xsk_pool, xsk_frames);
return completed_frames;
}
@ -657,6 +673,7 @@ skip:
* ice_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX
* @xdp: XDP buffer to xmit
* @xdp_ring: XDP ring to produce descriptor onto
* @xsk_pool: AF_XDP buffer pool pointer
*
* note that this function works directly on xdp_buff, no need to convert
* it to xdp_frame. xdp_buff pointer is stored to ice_tx_buf so that cleaning
@ -666,7 +683,8 @@ skip:
* was not enough space on XDP ring
*/
static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
struct ice_tx_ring *xdp_ring)
struct ice_tx_ring *xdp_ring,
struct xsk_buff_pool *xsk_pool)
{
struct skb_shared_info *sinfo = NULL;
u32 size = xdp->data_end - xdp->data;
@ -680,7 +698,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
free_space = ICE_DESC_UNUSED(xdp_ring);
if (free_space < ICE_RING_QUARTER(xdp_ring))
free_space += ice_clean_xdp_irq_zc(xdp_ring);
free_space += ice_clean_xdp_irq_zc(xdp_ring, xsk_pool);
if (unlikely(!free_space))
goto busy;
@ -700,7 +718,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
dma_addr_t dma;
dma = xsk_buff_xdp_get_dma(xdp);
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, size);
tx_buf->xdp = xdp;
tx_buf->type = ICE_TX_BUF_XSK_TX;
@ -742,12 +760,14 @@ busy:
* @xdp: xdp_buff used as input to the XDP program
* @xdp_prog: XDP program to run
* @xdp_ring: ring to be used for XDP_TX action
* @xsk_pool: AF_XDP buffer pool pointer
*
* Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
*/
static int
ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
struct xsk_buff_pool *xsk_pool)
{
int err, result = ICE_XDP_PASS;
u32 act;
@ -758,7 +778,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
if (!err)
return ICE_XDP_REDIR;
if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
if (xsk_uses_need_wakeup(xsk_pool) && err == -ENOBUFS)
result = ICE_XDP_EXIT;
else
result = ICE_XDP_CONSUMED;
@ -769,7 +789,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
case XDP_PASS:
break;
case XDP_TX:
result = ice_xmit_xdp_tx_zc(xdp, xdp_ring);
result = ice_xmit_xdp_tx_zc(xdp, xdp_ring, xsk_pool);
if (result == ICE_XDP_CONSUMED)
goto out_failure;
break;
@ -821,14 +841,16 @@ ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first,
/**
* ice_clean_rx_irq_zc - consumes packets from the hardware ring
* @rx_ring: AF_XDP Rx ring
* @xsk_pool: AF_XDP buffer pool pointer
* @budget: NAPI budget
*
* Returns number of processed packets on success, remaining budget on failure.
*/
int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring,
struct xsk_buff_pool *xsk_pool,
int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
struct xsk_buff_pool *xsk_pool = rx_ring->xsk_pool;
u32 ntc = rx_ring->next_to_clean;
u32 ntu = rx_ring->next_to_use;
struct xdp_buff *first = NULL;
@ -891,7 +913,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
if (ice_is_non_eop(rx_ring, rx_desc))
continue;
xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring,
xsk_pool);
if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
xdp_xmit |= xdp_res;
} else if (xdp_res == ICE_XDP_EXIT) {
@ -940,7 +963,8 @@ construct_skb:
rx_ring->next_to_clean = ntc;
entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring);
if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc);
failure |= !ice_alloc_rx_bufs_zc(rx_ring, xsk_pool,
entries_to_alloc);
ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0);
ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
@ -963,17 +987,19 @@ construct_skb:
/**
* ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
* @xdp_ring: XDP ring to produce the HW Tx descriptor on
* @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW
* @desc: AF_XDP descriptor to pull the DMA address and length from
* @total_bytes: bytes accumulator that will be used for stats update
*/
static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring,
struct xsk_buff_pool *xsk_pool, struct xdp_desc *desc,
unsigned int *total_bytes)
{
struct ice_tx_desc *tx_desc;
dma_addr_t dma;
dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
dma = xsk_buff_raw_get_dma(xsk_pool, desc->addr);
xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, desc->len);
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
tx_desc->buf_addr = cpu_to_le64(dma);
@ -986,10 +1012,13 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
/**
* ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
* @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW
* @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
* @total_bytes: bytes accumulator that will be used for stats update
*/
static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring,
struct xsk_buff_pool *xsk_pool,
struct xdp_desc *descs,
unsigned int *total_bytes)
{
u16 ntu = xdp_ring->next_to_use;
@ -999,8 +1028,8 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
dma_addr_t dma;
dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len);
dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr);
xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len);
tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
tx_desc->buf_addr = cpu_to_le64(dma);
@ -1016,60 +1045,69 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de
/**
* ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
* @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW
* @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
* @nb_pkts: count of packets to be send
* @total_bytes: bytes accumulator that will be used for stats update
*/
static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
u32 nb_pkts, unsigned int *total_bytes)
static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring,
struct xsk_buff_pool *xsk_pool,
struct xdp_desc *descs, u32 nb_pkts,
unsigned int *total_bytes)
{
u32 batched, leftover, i;
batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
leftover = nb_pkts & (PKTS_PER_BATCH - 1);
for (i = 0; i < batched; i += PKTS_PER_BATCH)
ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
ice_xmit_pkt_batch(xdp_ring, xsk_pool, &descs[i], total_bytes);
for (; i < batched + leftover; i++)
ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
ice_xmit_pkt(xdp_ring, xsk_pool, &descs[i], total_bytes);
}
/**
* ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
* @xsk_pool: AF_XDP buffer pool pointer
*
* Returns true if there is no more work that needs to be done, false otherwise
*/
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring)
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool)
{
struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
struct xdp_desc *descs = xsk_pool->tx_descs;
u32 nb_pkts, nb_processed = 0;
unsigned int total_bytes = 0;
int budget;
ice_clean_xdp_irq_zc(xdp_ring);
ice_clean_xdp_irq_zc(xdp_ring, xsk_pool);
if (!netif_carrier_ok(xdp_ring->vsi->netdev) ||
!netif_running(xdp_ring->vsi->netdev))
return true;
budget = ICE_DESC_UNUSED(xdp_ring);
budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring));
nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget);
if (!nb_pkts)
return true;
if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
nb_processed = xdp_ring->count - xdp_ring->next_to_use;
ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
ice_fill_tx_hw_ring(xdp_ring, xsk_pool, descs, nb_processed,
&total_bytes);
xdp_ring->next_to_use = 0;
}
ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
&total_bytes);
ice_fill_tx_hw_ring(xdp_ring, xsk_pool, &descs[nb_processed],
nb_pkts - nb_processed, &total_bytes);
ice_set_rs_bit(xdp_ring);
ice_xdp_ring_update_tail(xdp_ring);
ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
if (xsk_uses_need_wakeup(xsk_pool))
xsk_set_tx_need_wakeup(xsk_pool);
return nb_pkts < budget;
}
@ -1091,7 +1129,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
struct ice_vsi *vsi = np->vsi;
struct ice_tx_ring *ring;
if (test_bit(ICE_VSI_DOWN, vsi->state))
if (test_bit(ICE_VSI_DOWN, vsi->state) || !netif_carrier_ok(netdev))
return -ENETDOWN;
if (!ice_is_xdp_ena_vsi(vsi))
@ -1102,7 +1140,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
ring = vsi->rx_rings[queue_id]->xdp_ring;
if (!ring->xsk_pool)
if (!READ_ONCE(ring->xsk_pool))
return -EINVAL;
/* The idea here is that if NAPI is running, mark a miss, so

View File

@ -20,16 +20,20 @@ struct ice_vsi;
#ifdef CONFIG_XDP_SOCKETS
int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool,
u16 qid);
int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget);
int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring,
struct xsk_buff_pool *xsk_pool,
int budget);
int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count);
bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring,
struct xsk_buff_pool *xsk_pool, u16 count);
bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring);
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool);
int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc);
#else
static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring)
static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring,
struct xsk_buff_pool __always_unused *xsk_pool)
{
return false;
}
@ -44,6 +48,7 @@ ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi,
static inline int
ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring,
struct xsk_buff_pool __always_unused *xsk_pool,
int __always_unused budget)
{
return 0;
@ -51,6 +56,7 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring,
static inline bool
ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring,
struct xsk_buff_pool __always_unused *xsk_pool,
u16 __always_unused count)
{
return false;