Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue

Tony Nguyen says:

====================
ice: fix synchronization between .ndo_bpf() and reset

Larysa Zaremba says:

PF reset can be triggered asynchronously, by tx_timeout or by a user. With some
unfortunate timings both ice_vsi_rebuild() and .ndo_bpf will try to access and
modify XDP rings at the same time, causing system crash.

The first patch factors out rtnl-locked code from VSI rebuild code to avoid
deadlock. The following changes lock rebuild and .ndo_bpf() critical sections
with an internal mutex as well and provide complementary fixes.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  ice: do not bring the VSI up, if it was down before the XDP setup
  ice: remove ICE_CFG_BUSY locking from AF_XDP code
  ice: check ICE_VSI_DOWN under rtnl_lock when preparing for reset
  ice: check for XDP rings instead of bpf program when unconfiguring
  ice: protect XDP configuration with a mutex
  ice: move netif_queue_set_napi to rtnl-protected sections
====================

Link: https://patch.msgid.link/20240903183034.3530411-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-09-04 17:37:37 -07:00
commit f0417c50fd
6 changed files with 117 additions and 172 deletions

View File

@ -318,6 +318,7 @@ enum ice_vsi_state {
ICE_VSI_UMAC_FLTR_CHANGED,
ICE_VSI_MMAC_FLTR_CHANGED,
ICE_VSI_PROMISC_CHANGED,
ICE_VSI_REBUILD_PENDING,
ICE_VSI_STATE_NBITS /* must be last */
};
@ -411,6 +412,7 @@ struct ice_vsi {
struct ice_tx_ring **xdp_rings; /* XDP ring array */
u16 num_xdp_txq; /* Used XDP queues */
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
struct mutex xdp_state_lock;
struct net_device **target_netdevs;

View File

@ -190,16 +190,11 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
}
q_vector = vsi->q_vectors[v_idx];
ice_for_each_tx_ring(tx_ring, q_vector->tx) {
ice_queue_set_napi(vsi, tx_ring->q_index, NETDEV_QUEUE_TYPE_TX,
NULL);
ice_for_each_tx_ring(tx_ring, vsi->q_vectors[v_idx]->tx)
tx_ring->q_vector = NULL;
}
ice_for_each_rx_ring(rx_ring, q_vector->rx) {
ice_queue_set_napi(vsi, rx_ring->q_index, NETDEV_QUEUE_TYPE_RX,
NULL);
ice_for_each_rx_ring(rx_ring, vsi->q_vectors[v_idx]->rx)
rx_ring->q_vector = NULL;
}
/* only VSI with an associated netdev is set up with NAPI */
if (vsi->netdev)

View File

@ -447,6 +447,7 @@ static void ice_vsi_free(struct ice_vsi *vsi)
ice_vsi_free_stats(vsi);
ice_vsi_free_arrays(vsi);
mutex_destroy(&vsi->xdp_state_lock);
mutex_unlock(&pf->sw_mutex);
devm_kfree(dev, vsi);
}
@ -626,6 +627,8 @@ static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf)
pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
pf->next_vsi);
mutex_init(&vsi->xdp_state_lock);
unlock_pf:
mutex_unlock(&pf->sw_mutex);
return vsi;
@ -2286,9 +2289,6 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi)
ice_vsi_map_rings_to_vectors(vsi);
/* Associate q_vector rings to napi */
ice_vsi_set_napi_queues(vsi);
vsi->stat_offsets_loaded = false;
/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
@ -2426,7 +2426,7 @@ void ice_vsi_decfg(struct ice_vsi *vsi)
dev_err(ice_pf_to_dev(pf), "Failed to remove RDMA scheduler config for VSI %u, err %d\n",
vsi->vsi_num, err);
if (ice_is_xdp_ena_vsi(vsi))
if (vsi->xdp_rings)
/* return value check can be skipped here, it always returns
* 0 if reset is in progress
*/
@ -2528,7 +2528,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
for (q = 0; q < q_vector->num_ring_tx; q++) {
ice_write_itr(&q_vector->tx, 0);
wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
if (ice_is_xdp_ena_vsi(vsi)) {
if (vsi->xdp_rings) {
u32 xdp_txq = txq + vsi->num_xdp_txq;
wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
@ -2628,6 +2628,7 @@ void ice_vsi_close(struct ice_vsi *vsi)
if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state))
ice_down(vsi);
ice_vsi_clear_napi_queues(vsi);
ice_vsi_free_irq(vsi);
ice_vsi_free_tx_rings(vsi);
ice_vsi_free_rx_rings(vsi);
@ -2671,8 +2672,7 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
*/
void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
{
if (test_bit(ICE_VSI_DOWN, vsi->state))
return;
bool already_down = test_bit(ICE_VSI_DOWN, vsi->state);
set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
@ -2680,134 +2680,70 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
if (netif_running(vsi->netdev)) {
if (!locked)
rtnl_lock();
ice_vsi_close(vsi);
already_down = test_bit(ICE_VSI_DOWN, vsi->state);
if (!already_down)
ice_vsi_close(vsi);
if (!locked)
rtnl_unlock();
} else {
} else if (!already_down) {
ice_vsi_close(vsi);
}
} else if (vsi->type == ICE_VSI_CTRL) {
} else if (vsi->type == ICE_VSI_CTRL && !already_down) {
ice_vsi_close(vsi);
}
}
/**
* __ice_queue_set_napi - Set the napi instance for the queue
* @dev: device to which NAPI and queue belong
* @queue_index: Index of queue
* @type: queue type as RX or TX
* @napi: NAPI context
* @locked: is the rtnl_lock already held
*
* Set the napi instance for the queue. Caller indicates the lock status.
*/
static void
__ice_queue_set_napi(struct net_device *dev, unsigned int queue_index,
enum netdev_queue_type type, struct napi_struct *napi,
bool locked)
{
if (!locked)
rtnl_lock();
netif_queue_set_napi(dev, queue_index, type, napi);
if (!locked)
rtnl_unlock();
}
/**
* ice_queue_set_napi - Set the napi instance for the queue
* @vsi: VSI being configured
* @queue_index: Index of queue
* @type: queue type as RX or TX
* @napi: NAPI context
*
* Set the napi instance for the queue. The rtnl lock state is derived from the
* execution path.
*/
void
ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index,
enum netdev_queue_type type, struct napi_struct *napi)
{
struct ice_pf *pf = vsi->back;
if (!vsi->netdev)
return;
if (current_work() == &pf->serv_task ||
test_bit(ICE_PREPARED_FOR_RESET, pf->state) ||
test_bit(ICE_DOWN, pf->state) ||
test_bit(ICE_SUSPENDED, pf->state))
__ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
false);
else
__ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
true);
}
/**
* __ice_q_vector_set_napi_queues - Map queue[s] associated with the napi
* @q_vector: q_vector pointer
* @locked: is the rtnl_lock already held
*
* Associate the q_vector napi with all the queue[s] on the vector.
* Caller indicates the lock status.
*/
void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked)
{
struct ice_rx_ring *rx_ring;
struct ice_tx_ring *tx_ring;
ice_for_each_rx_ring(rx_ring, q_vector->rx)
__ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index,
NETDEV_QUEUE_TYPE_RX, &q_vector->napi,
locked);
ice_for_each_tx_ring(tx_ring, q_vector->tx)
__ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index,
NETDEV_QUEUE_TYPE_TX, &q_vector->napi,
locked);
/* Also set the interrupt number for the NAPI */
netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq);
}
/**
* ice_q_vector_set_napi_queues - Map queue[s] associated with the napi
* @q_vector: q_vector pointer
*
* Associate the q_vector napi with all the queue[s] on the vector
*/
void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector)
{
struct ice_rx_ring *rx_ring;
struct ice_tx_ring *tx_ring;
ice_for_each_rx_ring(rx_ring, q_vector->rx)
ice_queue_set_napi(q_vector->vsi, rx_ring->q_index,
NETDEV_QUEUE_TYPE_RX, &q_vector->napi);
ice_for_each_tx_ring(tx_ring, q_vector->tx)
ice_queue_set_napi(q_vector->vsi, tx_ring->q_index,
NETDEV_QUEUE_TYPE_TX, &q_vector->napi);
/* Also set the interrupt number for the NAPI */
netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq);
}
/**
* ice_vsi_set_napi_queues
* ice_vsi_set_napi_queues - associate netdev queues with napi
* @vsi: VSI pointer
*
* Associate queue[s] with napi for all vectors
* Associate queue[s] with napi for all vectors.
* The caller must hold rtnl_lock.
*/
void ice_vsi_set_napi_queues(struct ice_vsi *vsi)
{
int i;
struct net_device *netdev = vsi->netdev;
int q_idx, v_idx;
if (!vsi->netdev)
if (!netdev)
return;
ice_for_each_q_vector(vsi, i)
ice_q_vector_set_napi_queues(vsi->q_vectors[i]);
ice_for_each_rxq(vsi, q_idx)
netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_RX,
&vsi->rx_rings[q_idx]->q_vector->napi);
ice_for_each_txq(vsi, q_idx)
netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_TX,
&vsi->tx_rings[q_idx]->q_vector->napi);
/* Also set the interrupt number for the NAPI */
ice_for_each_q_vector(vsi, v_idx) {
struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq);
}
}
/**
* ice_vsi_clear_napi_queues - dissociate netdev queues from napi
* @vsi: VSI pointer
*
* Clear the association between all VSI queues queue[s] and napi.
* The caller must hold rtnl_lock.
*/
void ice_vsi_clear_napi_queues(struct ice_vsi *vsi)
{
struct net_device *netdev = vsi->netdev;
int q_idx;
if (!netdev)
return;
ice_for_each_txq(vsi, q_idx)
netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_TX, NULL);
ice_for_each_rxq(vsi, q_idx)
netif_queue_set_napi(netdev, q_idx, NETDEV_QUEUE_TYPE_RX, NULL);
}
/**
@ -3039,19 +2975,23 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
return -EINVAL;
mutex_lock(&vsi->xdp_state_lock);
ret = ice_vsi_realloc_stat_arrays(vsi);
if (ret)
goto err_vsi_cfg;
goto unlock;
ice_vsi_decfg(vsi);
ret = ice_vsi_cfg_def(vsi);
if (ret)
goto err_vsi_cfg;
goto unlock;
coalesce = kcalloc(vsi->num_q_vectors,
sizeof(struct ice_coalesce_stored), GFP_KERNEL);
if (!coalesce)
return -ENOMEM;
if (!coalesce) {
ret = -ENOMEM;
goto decfg;
}
prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
@ -3059,22 +2999,23 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
if (ret) {
if (vsi_flags & ICE_VSI_FLAG_INIT) {
ret = -EIO;
goto err_vsi_cfg_tc_lan;
goto free_coalesce;
}
kfree(coalesce);
return ice_schedule_reset(pf, ICE_RESET_PFR);
ret = ice_schedule_reset(pf, ICE_RESET_PFR);
goto free_coalesce;
}
ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
kfree(coalesce);
clear_bit(ICE_VSI_REBUILD_PENDING, vsi->state);
return 0;
err_vsi_cfg_tc_lan:
ice_vsi_decfg(vsi);
free_coalesce:
kfree(coalesce);
err_vsi_cfg:
decfg:
if (ret)
ice_vsi_decfg(vsi);
unlock:
mutex_unlock(&vsi->xdp_state_lock);
return ret;
}

View File

@ -44,16 +44,10 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);
struct ice_vsi *
ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params);
void
ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index,
enum netdev_queue_type type, struct napi_struct *napi);
void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked);
void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector);
void ice_vsi_set_napi_queues(struct ice_vsi *vsi);
void ice_vsi_clear_napi_queues(struct ice_vsi *vsi);
int ice_vsi_release(struct ice_vsi *vsi);
void ice_vsi_close(struct ice_vsi *vsi);

View File

@ -616,6 +616,7 @@ skip:
/* clear SW filtering DB */
ice_clear_hw_tbls(hw);
/* disable the VSIs and their queues that are not already DOWN */
set_bit(ICE_VSI_REBUILD_PENDING, ice_get_main_vsi(pf)->state);
ice_pf_dis_all_vsi(pf, false);
if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
@ -3004,8 +3005,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
bool if_running = netif_running(vsi->netdev);
int ret = 0, xdp_ring_err = 0;
bool if_running;
if (prog && !prog->aux->xdp_has_frags) {
if (frame_size > ice_max_xdp_frame_size(vsi)) {
@ -3016,13 +3017,17 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
}
/* hot swap progs and avoid toggling link */
if (ice_is_xdp_ena_vsi(vsi) == !!prog) {
if (ice_is_xdp_ena_vsi(vsi) == !!prog ||
test_bit(ICE_VSI_REBUILD_PENDING, vsi->state)) {
ice_vsi_assign_bpf_prog(vsi, prog);
return 0;
}
if_running = netif_running(vsi->netdev) &&
!test_and_set_bit(ICE_VSI_DOWN, vsi->state);
/* need to stop netdev while setting up the program for Rx rings */
if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
if (if_running) {
ret = ice_down(vsi);
if (ret) {
NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
@ -3088,21 +3093,28 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
struct ice_netdev_priv *np = netdev_priv(dev);
struct ice_vsi *vsi = np->vsi;
int ret;
if (vsi->type != ICE_VSI_PF) {
NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI");
return -EINVAL;
}
mutex_lock(&vsi->xdp_state_lock);
switch (xdp->command) {
case XDP_SETUP_PROG:
return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
ret = ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
break;
case XDP_SETUP_XSK_POOL:
return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
xdp->xsk.queue_id);
ret = ice_xsk_pool_setup(vsi, xdp->xsk.pool, xdp->xsk.queue_id);
break;
default:
return -EINVAL;
ret = -EINVAL;
}
mutex_unlock(&vsi->xdp_state_lock);
return ret;
}
/**
@ -3558,11 +3570,9 @@ static void ice_napi_add(struct ice_vsi *vsi)
if (!vsi->netdev)
return;
ice_for_each_q_vector(vsi, v_idx) {
ice_for_each_q_vector(vsi, v_idx)
netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
ice_napi_poll);
__ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false);
}
}
/**
@ -5540,7 +5550,9 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
if (ret)
goto err_reinit;
ice_vsi_map_rings_to_vectors(pf->vsi[v]);
rtnl_lock();
ice_vsi_set_napi_queues(pf->vsi[v]);
rtnl_unlock();
}
ret = ice_req_irq_msix_misc(pf);
@ -5554,8 +5566,12 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
err_reinit:
while (v--)
if (pf->vsi[v])
if (pf->vsi[v]) {
rtnl_lock();
ice_vsi_clear_napi_queues(pf->vsi[v]);
rtnl_unlock();
ice_vsi_free_q_vectors(pf->vsi[v]);
}
return ret;
}
@ -5620,6 +5636,9 @@ static int ice_suspend(struct device *dev)
ice_for_each_vsi(pf, v) {
if (!pf->vsi[v])
continue;
rtnl_lock();
ice_vsi_clear_napi_queues(pf->vsi[v]);
rtnl_unlock();
ice_vsi_free_q_vectors(pf->vsi[v]);
}
ice_clear_interrupt_scheme(pf);
@ -7233,7 +7252,7 @@ int ice_down(struct ice_vsi *vsi)
if (tx_err)
netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n",
vsi->vsi_num, tx_err);
if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
if (!tx_err && vsi->xdp_rings) {
tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
if (tx_err)
netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n",
@ -7250,7 +7269,7 @@ int ice_down(struct ice_vsi *vsi)
ice_for_each_txq(vsi, i)
ice_clean_tx_ring(vsi->tx_rings[i]);
if (ice_is_xdp_ena_vsi(vsi))
if (vsi->xdp_rings)
ice_for_each_xdp_txq(vsi, i)
ice_clean_tx_ring(vsi->xdp_rings[i]);
@ -7455,6 +7474,8 @@ int ice_vsi_open(struct ice_vsi *vsi)
err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq);
if (err)
goto err_set_qs;
ice_vsi_set_napi_queues(vsi);
}
err = ice_up_complete(vsi);

View File

@ -39,7 +39,7 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
sizeof(vsi_stat->rx_ring_stats[q_idx]->rx_stats));
memset(&vsi_stat->tx_ring_stats[q_idx]->stats, 0,
sizeof(vsi_stat->tx_ring_stats[q_idx]->stats));
if (ice_is_xdp_ena_vsi(vsi))
if (vsi->xdp_rings)
memset(&vsi->xdp_rings[q_idx]->ring_stats->stats, 0,
sizeof(vsi->xdp_rings[q_idx]->ring_stats->stats));
}
@ -52,7 +52,7 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
{
ice_clean_tx_ring(vsi->tx_rings[q_idx]);
if (ice_is_xdp_ena_vsi(vsi))
if (vsi->xdp_rings)
ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
ice_clean_rx_ring(vsi->rx_rings[q_idx]);
}
@ -165,7 +165,6 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
struct ice_q_vector *q_vector;
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
int timeout = 50;
int fail = 0;
int err;
@ -176,13 +175,6 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
rx_ring = vsi->rx_rings[q_idx];
q_vector = rx_ring->q_vector;
while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) {
timeout--;
if (!timeout)
return -EBUSY;
usleep_range(1000, 2000);
}
synchronize_net();
netif_carrier_off(vsi->netdev);
netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
@ -194,7 +186,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
if (!fail)
fail = err;
if (ice_is_xdp_ena_vsi(vsi)) {
if (vsi->xdp_rings) {
struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
memset(&txq_meta, 0, sizeof(txq_meta));
@ -261,7 +253,6 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
netif_carrier_on(vsi->netdev);
}
clear_bit(ICE_CFG_BUSY, vsi->state);
return fail;
}
@ -390,7 +381,8 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
goto failure;
}
if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
if_running = !test_bit(ICE_VSI_DOWN, vsi->state) &&
ice_is_xdp_ena_vsi(vsi);
if (if_running) {
struct ice_rx_ring *rx_ring = vsi->rx_rings[qid];