i40e, xsk: Terminate Rx side of NAPI when XSK Rx queue gets full

When XSK pool uses need_wakeup feature, correlate -ENOBUFS that was
returned from xdp_do_redirect() with a XSK Rx queue being full. In such
case, terminate the Rx processing that is being done on the current HW
Rx ring and let the user space consume descriptors from XSK Rx queue so
that there is room that driver can use later on.

Introduce new internal return code I40E_XDP_EXIT that will indicate case
described above.

Note that it does not affect Tx processing that is bound to the same
NAPI context, nor the other Rx rings.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220413153015.453864-7-maciej.fijalkowski@intel.com
This commit is contained in:
Maciej Fijalkowski 2022-04-13 17:30:07 +02:00 committed by Daniel Borkmann
parent 50ae066480
commit b8aef650e5
2 changed files with 23 additions and 10 deletions

View File

@ -20,6 +20,7 @@ void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val);
#define I40E_XDP_CONSUMED BIT(0) #define I40E_XDP_CONSUMED BIT(0)
#define I40E_XDP_TX BIT(1) #define I40E_XDP_TX BIT(1)
#define I40E_XDP_REDIR BIT(2) #define I40E_XDP_REDIR BIT(2)
#define I40E_XDP_EXIT BIT(3)
/* /*
* build_ctob - Builds the Tx descriptor (cmd, offset and type) qword * build_ctob - Builds the Tx descriptor (cmd, offset and type) qword

View File

@ -161,9 +161,13 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
if (likely(act == XDP_REDIRECT)) { if (likely(act == XDP_REDIRECT)) {
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
if (err) if (!err)
goto out_failure; return I40E_XDP_REDIR;
return I40E_XDP_REDIR; if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
result = I40E_XDP_EXIT;
else
result = I40E_XDP_CONSUMED;
goto out_failure;
} }
switch (act) { switch (act) {
@ -175,16 +179,17 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
if (result == I40E_XDP_CONSUMED) if (result == I40E_XDP_CONSUMED)
goto out_failure; goto out_failure;
break; break;
case XDP_DROP:
result = I40E_XDP_CONSUMED;
break;
default: default:
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
fallthrough; fallthrough;
case XDP_ABORTED: case XDP_ABORTED:
result = I40E_XDP_CONSUMED;
out_failure: out_failure:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act); trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
fallthrough; /* handle aborts by dropping packet */ fallthrough; /* handle aborts by dropping packet */
case XDP_DROP:
result = I40E_XDP_CONSUMED;
break;
} }
return result; return result;
} }
@ -271,7 +276,8 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
unsigned int *rx_packets, unsigned int *rx_packets,
unsigned int *rx_bytes, unsigned int *rx_bytes,
unsigned int size, unsigned int size,
unsigned int xdp_res) unsigned int xdp_res,
bool *failure)
{ {
struct sk_buff *skb; struct sk_buff *skb;
@ -281,11 +287,15 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
if (likely(xdp_res == I40E_XDP_REDIR) || xdp_res == I40E_XDP_TX) if (likely(xdp_res == I40E_XDP_REDIR) || xdp_res == I40E_XDP_TX)
return; return;
if (xdp_res == I40E_XDP_EXIT) {
*failure = true;
return;
}
if (xdp_res == I40E_XDP_CONSUMED) { if (xdp_res == I40E_XDP_CONSUMED) {
xsk_buff_free(xdp_buff); xsk_buff_free(xdp_buff);
return; return;
} }
if (xdp_res == I40E_XDP_PASS) { if (xdp_res == I40E_XDP_PASS) {
/* NB! We are not checking for errors using /* NB! We are not checking for errors using
* i40e_test_staterr with * i40e_test_staterr with
@ -371,7 +381,9 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
xdp_res = i40e_run_xdp_zc(rx_ring, bi); xdp_res = i40e_run_xdp_zc(rx_ring, bi);
i40e_handle_xdp_result_zc(rx_ring, bi, rx_desc, &rx_packets, i40e_handle_xdp_result_zc(rx_ring, bi, rx_desc, &rx_packets,
&rx_bytes, size, xdp_res); &rx_bytes, size, xdp_res, &failure);
if (failure)
break;
total_rx_packets += rx_packets; total_rx_packets += rx_packets;
total_rx_bytes += rx_bytes; total_rx_bytes += rx_bytes;
xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR); xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR);
@ -382,7 +394,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
cleaned_count = (next_to_clean - rx_ring->next_to_use - 1) & count_mask; cleaned_count = (next_to_clean - rx_ring->next_to_use - 1) & count_mask;
if (cleaned_count >= I40E_RX_BUFFER_WRITE) if (cleaned_count >= I40E_RX_BUFFER_WRITE)
failure = !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count); failure |= !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count);
i40e_finalize_xdp_rx(rx_ring, xdp_xmit); i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);