2018-09-20 08:23:04 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/* Copyright (c) 2018, Intel Corporation. */
|
|
|
|
|
|
|
|
#include "ice.h"
|
2019-10-24 16:11:17 +08:00
|
|
|
#include "ice_base.h"
|
2020-01-17 23:39:12 +08:00
|
|
|
#include "ice_flow.h"
|
2018-09-20 08:23:04 +08:00
|
|
|
#include "ice_lib.h"
|
2020-05-08 08:41:08 +08:00
|
|
|
#include "ice_fltr.h"
|
2019-03-01 07:24:24 +08:00
|
|
|
#include "ice_dcb_lib.h"
|
ice: refactor devlink_port to be per-VSI
Currently, the devlink_port structure is stored within the ice_pf. This
made sense because we create a single devlink_port for each PF. This
setup does not mesh with the abstractions in the driver very well, and
led to a flow where we accidentally call devlink_port_unregister twice
during error cleanup.
In particular, if devlink_port_register or devlink_port_unregister are
called twice, this leads to a kernel panic. This appears to occur during
some possible flows while cleaning up from a failure during driver
probe.
If register_netdev fails, then we will call devlink_port_unregister in
ice_cfg_netdev as it cleans up. Later, we again call
devlink_port_unregister since we assume that we must cleanup the port
that is associated with the PF structure.
This occurs because we cleanup the devlink_port for the main PF even
though it was not allocated. We allocated the port within a per-VSI
function for managing the main netdev, but did not release the port when
cleaning up that VSI, the allocation and destruction are not aligned.
Instead of attempting to manage the devlink_port as part of the PF
structure, manage it as part of the PF VSI. Doing this has advantages,
as we can match the de-allocation of the devlink_port with the
unregister_netdev associated with the main PF VSI.
Moving the port to the VSI is preferable as it paves the way for
handling devlink ports allocated for other purposes such as SR-IOV VFs.
Since we're changing up how we allocate the devlink_port, also change
the indexing. Originally, we indexed the port using the PF id number.
This came from an old goal of sharing a devlink for each physical
function. Managing devlink instances across multiple function drivers is
not workable. Instead, lets set the port number to the logical port
number returned by firmware and set the index using the VSI index
(sometimes referred to as VSI handle).
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-08 01:54:44 +08:00
|
|
|
#include "ice_devlink.h"
|
2021-12-03 00:38:46 +08:00
|
|
|
#include "ice_vsi_vlan_ops.h"
|
2018-09-20 08:23:04 +08:00
|
|
|
|
2019-11-06 18:05:39 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_type_str - maps VSI type enum to string equivalents
|
2020-02-28 02:15:04 +08:00
|
|
|
* @vsi_type: VSI type enum
|
2019-11-06 18:05:39 +08:00
|
|
|
*/
|
2020-02-28 02:15:04 +08:00
|
|
|
const char *ice_vsi_type_str(enum ice_vsi_type vsi_type)
|
2019-11-06 18:05:39 +08:00
|
|
|
{
|
2020-02-28 02:15:04 +08:00
|
|
|
switch (vsi_type) {
|
2019-11-06 18:05:39 +08:00
|
|
|
case ICE_VSI_PF:
|
|
|
|
return "ICE_VSI_PF";
|
|
|
|
case ICE_VSI_VF:
|
|
|
|
return "ICE_VSI_VF";
|
2020-05-12 09:01:40 +08:00
|
|
|
case ICE_VSI_CTRL:
|
|
|
|
return "ICE_VSI_CTRL";
|
2021-10-16 07:35:15 +08:00
|
|
|
case ICE_VSI_CHNL:
|
|
|
|
return "ICE_VSI_CHNL";
|
2019-11-06 18:05:39 +08:00
|
|
|
case ICE_VSI_LB:
|
|
|
|
return "ICE_VSI_LB";
|
2021-08-20 08:08:55 +08:00
|
|
|
case ICE_VSI_SWITCHDEV_CTRL:
|
|
|
|
return "ICE_VSI_SWITCHDEV_CTRL";
|
2019-11-06 18:05:39 +08:00
|
|
|
default:
|
|
|
|
return "unknown";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-02 16:25:19 +08:00
|
|
|
/**
|
2020-01-22 23:21:29 +08:00
|
|
|
* ice_vsi_ctrl_all_rx_rings - Start or stop a VSI's Rx rings
|
2019-08-02 16:25:19 +08:00
|
|
|
* @vsi: the VSI being configured
|
|
|
|
* @ena: start or stop the Rx rings
|
2020-01-22 23:21:29 +08:00
|
|
|
*
|
|
|
|
* First enable/disable all of the Rx rings, flush any remaining writes, and
|
|
|
|
* then verify that they have all been enabled/disabled successfully. This will
|
|
|
|
* let all of the register writes complete when enabling/disabling the Rx rings
|
|
|
|
* before waiting for the change in hardware to complete.
|
2019-08-02 16:25:19 +08:00
|
|
|
*/
|
2020-01-22 23:21:29 +08:00
|
|
|
static int ice_vsi_ctrl_all_rx_rings(struct ice_vsi *vsi, bool ena)
|
2019-08-02 16:25:19 +08:00
|
|
|
{
|
2020-05-08 08:41:05 +08:00
|
|
|
int ret = 0;
|
|
|
|
u16 i;
|
2019-08-02 16:25:19 +08:00
|
|
|
|
2021-08-19 20:00:04 +08:00
|
|
|
ice_for_each_rxq(vsi, i)
|
2020-01-22 23:21:29 +08:00
|
|
|
ice_vsi_ctrl_one_rx_ring(vsi, ena, i, false);
|
|
|
|
|
|
|
|
ice_flush(&vsi->back->hw);
|
|
|
|
|
2021-08-19 20:00:04 +08:00
|
|
|
ice_for_each_rxq(vsi, i) {
|
2020-01-22 23:21:29 +08:00
|
|
|
ret = ice_vsi_wait_one_rx_ring(vsi, ena, i);
|
2019-08-02 16:25:19 +08:00
|
|
|
if (ret)
|
2018-09-20 08:23:05 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:07 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the VSI
|
|
|
|
* @vsi: VSI pointer
|
|
|
|
*
|
|
|
|
* On error: returns error code (negative)
|
|
|
|
* On success: returns 0
|
|
|
|
*/
|
2019-04-17 01:21:19 +08:00
|
|
|
static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
|
2018-09-20 08:23:07 +08:00
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
2019-11-08 22:23:26 +08:00
|
|
|
struct device *dev;
|
|
|
|
|
|
|
|
dev = ice_pf_to_dev(pf);
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi->type == ICE_VSI_CHNL)
|
|
|
|
return 0;
|
2018-09-20 08:23:07 +08:00
|
|
|
|
|
|
|
/* allocate memory for both Tx and Rx ring pointers */
|
2019-11-08 22:23:26 +08:00
|
|
|
vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq,
|
2019-02-09 04:50:31 +08:00
|
|
|
sizeof(*vsi->tx_rings), GFP_KERNEL);
|
2018-09-20 08:23:07 +08:00
|
|
|
if (!vsi->tx_rings)
|
2019-08-02 16:25:21 +08:00
|
|
|
return -ENOMEM;
|
2018-09-20 08:23:07 +08:00
|
|
|
|
2019-11-08 22:23:26 +08:00
|
|
|
vsi->rx_rings = devm_kcalloc(dev, vsi->alloc_rxq,
|
2019-02-09 04:50:31 +08:00
|
|
|
sizeof(*vsi->rx_rings), GFP_KERNEL);
|
2018-09-20 08:23:07 +08:00
|
|
|
if (!vsi->rx_rings)
|
2019-08-02 16:25:21 +08:00
|
|
|
goto err_rings;
|
|
|
|
|
2021-10-27 00:47:18 +08:00
|
|
|
/* txq_map needs to have enough space to track both Tx (stack) rings
|
|
|
|
* and XDP rings; at this point vsi->num_xdp_txq might not be set,
|
|
|
|
* so use num_possible_cpus() as we want to always provide XDP ring
|
|
|
|
* per CPU, regardless of queue count settings from user that might
|
|
|
|
* have come from ethtool's set_channels() callback;
|
|
|
|
*/
|
|
|
|
vsi->txq_map = devm_kcalloc(dev, (vsi->alloc_txq + num_possible_cpus()),
|
2019-08-02 16:25:21 +08:00
|
|
|
sizeof(*vsi->txq_map), GFP_KERNEL);
|
|
|
|
|
|
|
|
if (!vsi->txq_map)
|
|
|
|
goto err_txq_map;
|
|
|
|
|
2019-11-08 22:23:26 +08:00
|
|
|
vsi->rxq_map = devm_kcalloc(dev, vsi->alloc_rxq,
|
2019-08-02 16:25:21 +08:00
|
|
|
sizeof(*vsi->rxq_map), GFP_KERNEL);
|
|
|
|
if (!vsi->rxq_map)
|
|
|
|
goto err_rxq_map;
|
|
|
|
|
2019-04-17 01:30:43 +08:00
|
|
|
/* There is no need to allocate q_vectors for a loopback VSI. */
|
|
|
|
if (vsi->type == ICE_VSI_LB)
|
|
|
|
return 0;
|
|
|
|
|
2019-04-17 01:21:19 +08:00
|
|
|
/* allocate memory for q_vector pointers */
|
2019-11-08 22:23:26 +08:00
|
|
|
vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors,
|
2019-04-17 01:21:19 +08:00
|
|
|
sizeof(*vsi->q_vectors), GFP_KERNEL);
|
|
|
|
if (!vsi->q_vectors)
|
|
|
|
goto err_vectors;
|
2018-09-20 08:23:07 +08:00
|
|
|
|
2021-04-28 03:52:09 +08:00
|
|
|
vsi->af_xdp_zc_qps = bitmap_zalloc(max_t(int, vsi->alloc_txq, vsi->alloc_rxq), GFP_KERNEL);
|
|
|
|
if (!vsi->af_xdp_zc_qps)
|
|
|
|
goto err_zc_qps;
|
|
|
|
|
2018-09-20 08:23:07 +08:00
|
|
|
return 0;
|
|
|
|
|
2021-04-28 03:52:09 +08:00
|
|
|
err_zc_qps:
|
|
|
|
devm_kfree(dev, vsi->q_vectors);
|
2018-09-20 08:23:07 +08:00
|
|
|
err_vectors:
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi->rxq_map);
|
2019-08-02 16:25:21 +08:00
|
|
|
err_rxq_map:
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi->txq_map);
|
2019-08-02 16:25:21 +08:00
|
|
|
err_txq_map:
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi->rx_rings);
|
2019-08-02 16:25:21 +08:00
|
|
|
err_rings:
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi->tx_rings);
|
2018-09-20 08:23:07 +08:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2019-02-09 04:50:59 +08:00
|
|
|
* ice_vsi_set_num_desc - Set number of descriptors for queues on this VSI
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
*/
|
|
|
|
static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
switch (vsi->type) {
|
|
|
|
case ICE_VSI_PF:
|
2021-08-20 08:08:55 +08:00
|
|
|
case ICE_VSI_SWITCHDEV_CTRL:
|
2020-05-12 09:01:40 +08:00
|
|
|
case ICE_VSI_CTRL:
|
2019-04-17 01:30:43 +08:00
|
|
|
case ICE_VSI_LB:
|
2020-07-14 04:53:09 +08:00
|
|
|
/* a user could change the values of num_[tr]x_desc using
|
|
|
|
* ethtool -G so we should keep those values instead of
|
|
|
|
* overwriting them with the defaults.
|
|
|
|
*/
|
|
|
|
if (!vsi->num_rx_desc)
|
|
|
|
vsi->num_rx_desc = ICE_DFLT_NUM_RX_DESC;
|
|
|
|
if (!vsi->num_tx_desc)
|
|
|
|
vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC;
|
2019-02-09 04:50:59 +08:00
|
|
|
break;
|
|
|
|
default:
|
2020-02-06 17:20:10 +08:00
|
|
|
dev_dbg(ice_pf_to_dev(vsi->back), "Not setting number of Tx/Rx descriptors for VSI type %d\n",
|
2019-02-09 04:50:59 +08:00
|
|
|
vsi->type);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI
|
2018-09-20 08:23:07 +08:00
|
|
|
* @vsi: the VSI being configured
|
2022-02-17 05:37:29 +08:00
|
|
|
* @vf: the VF associated with this VSI, if any
|
2018-09-20 08:23:07 +08:00
|
|
|
*
|
|
|
|
* Return 0 on success and a negative value on error
|
|
|
|
*/
|
2022-02-17 05:37:29 +08:00
|
|
|
static void ice_vsi_set_num_qs(struct ice_vsi *vsi, struct ice_vf *vf)
|
2018-09-20 08:23:07 +08:00
|
|
|
{
|
2022-02-17 05:37:29 +08:00
|
|
|
enum ice_vsi_type vsi_type = vsi->type;
|
2018-09-20 08:23:07 +08:00
|
|
|
struct ice_pf *pf = vsi->back;
|
2019-02-27 08:35:09 +08:00
|
|
|
|
2022-02-17 05:37:29 +08:00
|
|
|
if (WARN_ON(vsi_type == ICE_VSI_VF && !vf))
|
|
|
|
return;
|
2019-02-27 08:35:09 +08:00
|
|
|
|
2022-02-17 05:37:29 +08:00
|
|
|
switch (vsi_type) {
|
2018-09-20 08:23:07 +08:00
|
|
|
case ICE_VSI_PF:
|
2019-11-08 22:23:29 +08:00
|
|
|
if (vsi->req_txq) {
|
|
|
|
vsi->alloc_txq = vsi->req_txq;
|
|
|
|
vsi->num_txq = vsi->req_txq;
|
2021-04-23 08:00:18 +08:00
|
|
|
} else {
|
|
|
|
vsi->alloc_txq = min3(pf->num_lan_msix,
|
|
|
|
ice_get_avail_txq_count(pf),
|
|
|
|
(u16)num_online_cpus());
|
2019-11-08 22:23:29 +08:00
|
|
|
}
|
2019-09-03 16:31:06 +08:00
|
|
|
|
|
|
|
pf->num_lan_tx = vsi->alloc_txq;
|
|
|
|
|
|
|
|
/* only 1 Rx queue unless RSS is enabled */
|
2019-11-08 22:23:29 +08:00
|
|
|
if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
|
2019-09-03 16:31:06 +08:00
|
|
|
vsi->alloc_rxq = 1;
|
2019-11-08 22:23:29 +08:00
|
|
|
} else {
|
|
|
|
if (vsi->req_rxq) {
|
|
|
|
vsi->alloc_rxq = vsi->req_rxq;
|
|
|
|
vsi->num_rxq = vsi->req_rxq;
|
2021-04-23 08:00:18 +08:00
|
|
|
} else {
|
|
|
|
vsi->alloc_rxq = min3(pf->num_lan_msix,
|
|
|
|
ice_get_avail_rxq_count(pf),
|
|
|
|
(u16)num_online_cpus());
|
2019-11-08 22:23:29 +08:00
|
|
|
}
|
|
|
|
}
|
2019-09-03 16:31:06 +08:00
|
|
|
|
|
|
|
pf->num_lan_rx = vsi->alloc_rxq;
|
|
|
|
|
2021-01-22 02:38:06 +08:00
|
|
|
vsi->num_q_vectors = min_t(int, pf->num_lan_msix,
|
|
|
|
max_t(int, vsi->alloc_rxq,
|
|
|
|
vsi->alloc_txq));
|
2018-09-20 08:23:07 +08:00
|
|
|
break;
|
2021-08-20 08:08:55 +08:00
|
|
|
case ICE_VSI_SWITCHDEV_CTRL:
|
|
|
|
/* The number of queues for ctrl VSI is equal to number of VFs.
|
|
|
|
* Each ring is associated to the corresponding VF_PR netdev.
|
|
|
|
*/
|
2022-02-17 05:37:37 +08:00
|
|
|
vsi->alloc_txq = ice_get_num_vfs(pf);
|
|
|
|
vsi->alloc_rxq = vsi->alloc_txq;
|
2021-08-20 08:08:55 +08:00
|
|
|
vsi->num_q_vectors = 1;
|
|
|
|
break;
|
2018-09-20 08:42:56 +08:00
|
|
|
case ICE_VSI_VF:
|
2021-02-27 05:19:20 +08:00
|
|
|
if (vf->num_req_qs)
|
|
|
|
vf->num_vf_qs = vf->num_req_qs;
|
2019-02-27 08:35:09 +08:00
|
|
|
vsi->alloc_txq = vf->num_vf_qs;
|
|
|
|
vsi->alloc_rxq = vf->num_vf_qs;
|
2022-02-17 05:37:36 +08:00
|
|
|
/* pf->vfs.num_msix_per includes (VF miscellaneous vector +
|
2018-09-20 08:42:56 +08:00
|
|
|
* data queue interrupts). Since vsi->num_q_vectors is number
|
2019-04-17 01:34:51 +08:00
|
|
|
* of queues vectors, subtract 1 (ICE_NONQ_VECS_VF) from the
|
|
|
|
* original vector count
|
2018-09-20 08:42:56 +08:00
|
|
|
*/
|
2022-02-17 05:37:36 +08:00
|
|
|
vsi->num_q_vectors = pf->vfs.num_msix_per - ICE_NONQ_VECS_VF;
|
2018-09-20 08:42:56 +08:00
|
|
|
break;
|
2020-05-12 09:01:40 +08:00
|
|
|
case ICE_VSI_CTRL:
|
|
|
|
vsi->alloc_txq = 1;
|
|
|
|
vsi->alloc_rxq = 1;
|
|
|
|
vsi->num_q_vectors = 1;
|
|
|
|
break;
|
2021-10-16 07:35:15 +08:00
|
|
|
case ICE_VSI_CHNL:
|
|
|
|
vsi->alloc_txq = 0;
|
|
|
|
vsi->alloc_rxq = 0;
|
|
|
|
break;
|
2019-04-17 01:30:43 +08:00
|
|
|
case ICE_VSI_LB:
|
|
|
|
vsi->alloc_txq = 1;
|
|
|
|
vsi->alloc_rxq = 1;
|
|
|
|
break;
|
2018-09-20 08:23:07 +08:00
|
|
|
default:
|
2022-02-17 05:37:29 +08:00
|
|
|
dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi_type);
|
2018-09-20 08:23:07 +08:00
|
|
|
break;
|
|
|
|
}
|
2019-02-09 04:50:59 +08:00
|
|
|
|
|
|
|
ice_vsi_set_num_desc(vsi);
|
2018-09-20 08:23:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_get_free_slot - get the next non-NULL location index in array
|
|
|
|
* @array: array to search
|
|
|
|
* @size: size of the array
|
|
|
|
* @curr: last known occupied index to be used as a search hint
|
|
|
|
*
|
|
|
|
* void * is being used to keep the functionality generic. This lets us use this
|
|
|
|
* function on any array of pointers.
|
|
|
|
*/
|
2018-09-20 08:23:10 +08:00
|
|
|
static int ice_get_free_slot(void *array, int size, int curr)
|
2018-09-20 08:23:07 +08:00
|
|
|
{
|
|
|
|
int **tmp_array = (int **)array;
|
|
|
|
int next;
|
|
|
|
|
|
|
|
if (curr < (size - 1) && !tmp_array[curr + 1]) {
|
|
|
|
next = curr + 1;
|
|
|
|
} else {
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
while ((i < size) && (tmp_array[i]))
|
|
|
|
i++;
|
|
|
|
if (i == size)
|
|
|
|
next = ICE_NO_VSI;
|
|
|
|
else
|
|
|
|
next = i;
|
|
|
|
}
|
|
|
|
return next;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:06 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_delete - delete a VSI from the switch
|
|
|
|
* @vsi: pointer to VSI being removed
|
|
|
|
*/
|
2021-10-16 07:35:15 +08:00
|
|
|
void ice_vsi_delete(struct ice_vsi *vsi)
|
2018-09-20 08:23:06 +08:00
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
2019-02-09 04:50:32 +08:00
|
|
|
struct ice_vsi_ctx *ctxt;
|
2021-10-08 06:56:57 +08:00
|
|
|
int status;
|
2018-09-20 08:23:06 +08:00
|
|
|
|
2019-11-08 22:23:25 +08:00
|
|
|
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
|
2019-02-09 04:50:32 +08:00
|
|
|
if (!ctxt)
|
|
|
|
return;
|
|
|
|
|
2018-09-20 08:42:56 +08:00
|
|
|
if (vsi->type == ICE_VSI_VF)
|
2022-02-17 05:37:29 +08:00
|
|
|
ctxt->vf_num = vsi->vf->vf_id;
|
2019-02-09 04:50:32 +08:00
|
|
|
ctxt->vsi_num = vsi->vsi_num;
|
2018-09-20 08:23:06 +08:00
|
|
|
|
2019-02-09 04:50:32 +08:00
|
|
|
memcpy(&ctxt->info, &vsi->info, sizeof(ctxt->info));
|
2018-09-20 08:23:06 +08:00
|
|
|
|
2019-02-09 04:50:32 +08:00
|
|
|
status = ice_free_vsi(&pf->hw, vsi->idx, ctxt, false, NULL);
|
2018-09-20 08:23:06 +08:00
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %d\n",
|
|
|
|
vsi->vsi_num, status);
|
2019-02-09 04:50:32 +08:00
|
|
|
|
2019-11-08 22:23:25 +08:00
|
|
|
kfree(ctxt);
|
2018-09-20 08:23:06 +08:00
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:08 +08:00
|
|
|
/**
|
2019-04-17 01:21:19 +08:00
|
|
|
* ice_vsi_free_arrays - De-allocate queue and vector pointer arrays for the VSI
|
2018-09-20 08:23:08 +08:00
|
|
|
* @vsi: pointer to VSI being cleared
|
|
|
|
*/
|
2019-04-17 01:21:19 +08:00
|
|
|
static void ice_vsi_free_arrays(struct ice_vsi *vsi)
|
2018-09-20 08:23:08 +08:00
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
2019-11-08 22:23:26 +08:00
|
|
|
struct device *dev;
|
|
|
|
|
|
|
|
dev = ice_pf_to_dev(pf);
|
2018-09-20 08:23:08 +08:00
|
|
|
|
2021-04-28 03:52:09 +08:00
|
|
|
if (vsi->af_xdp_zc_qps) {
|
|
|
|
bitmap_free(vsi->af_xdp_zc_qps);
|
|
|
|
vsi->af_xdp_zc_qps = NULL;
|
|
|
|
}
|
2018-09-20 08:23:08 +08:00
|
|
|
/* free the ring and vector containers */
|
2019-04-17 01:21:19 +08:00
|
|
|
if (vsi->q_vectors) {
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi->q_vectors);
|
2018-09-20 08:23:08 +08:00
|
|
|
vsi->q_vectors = NULL;
|
|
|
|
}
|
|
|
|
if (vsi->tx_rings) {
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi->tx_rings);
|
2018-09-20 08:23:08 +08:00
|
|
|
vsi->tx_rings = NULL;
|
|
|
|
}
|
|
|
|
if (vsi->rx_rings) {
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi->rx_rings);
|
2018-09-20 08:23:08 +08:00
|
|
|
vsi->rx_rings = NULL;
|
|
|
|
}
|
2019-08-02 16:25:21 +08:00
|
|
|
if (vsi->txq_map) {
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi->txq_map);
|
2019-08-02 16:25:21 +08:00
|
|
|
vsi->txq_map = NULL;
|
|
|
|
}
|
|
|
|
if (vsi->rxq_map) {
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi->rxq_map);
|
2019-08-02 16:25:21 +08:00
|
|
|
vsi->rxq_map = NULL;
|
|
|
|
}
|
2018-09-20 08:23:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_clear - clean up and deallocate the provided VSI
|
|
|
|
* @vsi: pointer to VSI being cleared
|
|
|
|
*
|
|
|
|
* This deallocates the VSI's queue resources, removes it from the PF's
|
|
|
|
* VSI array if necessary, and deallocates the VSI
|
|
|
|
*
|
|
|
|
* Returns 0 on success, negative on failure
|
|
|
|
*/
|
2021-10-16 07:35:15 +08:00
|
|
|
int ice_vsi_clear(struct ice_vsi *vsi)
|
2018-09-20 08:23:08 +08:00
|
|
|
{
|
|
|
|
struct ice_pf *pf = NULL;
|
2019-11-08 22:23:26 +08:00
|
|
|
struct device *dev;
|
2018-09-20 08:23:08 +08:00
|
|
|
|
|
|
|
if (!vsi)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!vsi->back)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
pf = vsi->back;
|
2019-11-08 22:23:26 +08:00
|
|
|
dev = ice_pf_to_dev(pf);
|
2018-09-20 08:23:08 +08:00
|
|
|
|
|
|
|
if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) {
|
2019-11-08 22:23:26 +08:00
|
|
|
dev_dbg(dev, "vsi does not exist at pf->vsi[%d]\n", vsi->idx);
|
2018-09-20 08:23:08 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_lock(&pf->sw_mutex);
|
|
|
|
/* updates the PF for this cleared VSI */
|
|
|
|
|
|
|
|
pf->vsi[vsi->idx] = NULL;
|
2020-05-12 09:01:40 +08:00
|
|
|
if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL)
|
2018-09-20 08:23:08 +08:00
|
|
|
pf->next_vsi = vsi->idx;
|
2022-02-17 05:37:29 +08:00
|
|
|
if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL && vsi->vf)
|
2021-03-09 11:08:03 +08:00
|
|
|
pf->next_vsi = vsi->idx;
|
2018-09-20 08:23:08 +08:00
|
|
|
|
2019-04-17 01:21:19 +08:00
|
|
|
ice_vsi_free_arrays(vsi);
|
2018-09-20 08:23:08 +08:00
|
|
|
mutex_unlock(&pf->sw_mutex);
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi);
|
2018-09-20 08:23:08 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-05-12 09:01:40 +08:00
|
|
|
/**
|
|
|
|
* ice_msix_clean_ctrl_vsi - MSIX mode interrupt handler for ctrl VSI
|
|
|
|
* @irq: interrupt number
|
|
|
|
* @data: pointer to a q_vector
|
|
|
|
*/
|
|
|
|
static irqreturn_t ice_msix_clean_ctrl_vsi(int __always_unused irq, void *data)
|
|
|
|
{
|
|
|
|
struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
|
|
|
|
|
2021-08-19 19:59:58 +08:00
|
|
|
if (!q_vector->tx.tx_ring)
|
2020-05-12 09:01:40 +08:00
|
|
|
return IRQ_HANDLED;
|
|
|
|
|
|
|
|
#define FDIR_RX_DESC_CLEAN_BUDGET 64
|
2021-08-19 19:59:58 +08:00
|
|
|
ice_clean_rx_irq(q_vector->rx.rx_ring, FDIR_RX_DESC_CLEAN_BUDGET);
|
|
|
|
ice_clean_ctrl_tx_irq(q_vector->tx.tx_ring);
|
2020-05-12 09:01:40 +08:00
|
|
|
|
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:06 +08:00
|
|
|
/**
|
|
|
|
* ice_msix_clean_rings - MSIX mode Interrupt Handler
|
|
|
|
* @irq: interrupt number
|
|
|
|
* @data: pointer to a q_vector
|
|
|
|
*/
|
2018-10-18 23:37:03 +08:00
|
|
|
static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
|
2018-09-20 08:23:06 +08:00
|
|
|
{
|
|
|
|
struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
|
|
|
|
|
2021-08-19 19:59:58 +08:00
|
|
|
if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring)
|
2018-09-20 08:23:06 +08:00
|
|
|
return IRQ_HANDLED;
|
|
|
|
|
ice: replace custom AIM algorithm with kernel's DIM library
The ice driver has support for adaptive interrupt moderation, an
algorithm for tuning the interrupt rate dynamically. This algorithm
is based on various assumptions about ring size, socket buffer size,
link speed, SKB overhead, ethernet frame overhead and more.
The Linux kernel has support for a dynamic interrupt moderation
algorithm known as "dimlib". Replace the custom driver-specific
implementation of dynamic interrupt moderation with the kernel's
algorithm.
The Intel hardware has a different hardware implementation than the
originators of the dimlib code had to work with, which requires the
driver to use a slightly different set of inputs for the actual
moderation values, while getting all the advice from dimlib of
better/worse, shift left or right.
The change made for this implementation is to use a pair of values
for each of the 5 "slots" that the dimlib moderation expects, and
the driver will program those pairs when dimlib recommends a slot to
use. The currently implementation uses two tables, one for receive
and one for transmit, and the pairs of values in each slot set the
maximum delay of an interrupt and a maximum number of interrupts per
second (both expressed in microseconds).
There are two separate kinds of bugs fixed by using DIMLIB, one is
UDP single stream send was too slow, and the other is that 8K
ping-pong was going to the most aggressive moderation and has much
too high latency.
The overall result of using DIMLIB is that we meet or exceed our
performance expectations set based on the old algorithm.
Co-developed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-04-01 05:16:57 +08:00
|
|
|
q_vector->total_events++;
|
|
|
|
|
2018-09-20 08:23:06 +08:00
|
|
|
napi_schedule(&q_vector->napi);
|
|
|
|
|
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
2021-08-20 08:08:55 +08:00
|
|
|
static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *data)
|
|
|
|
{
|
|
|
|
struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
|
|
|
|
struct ice_pf *pf = q_vector->vsi->back;
|
2022-02-17 05:37:35 +08:00
|
|
|
struct ice_vf *vf;
|
|
|
|
unsigned int bkt;
|
2021-08-20 08:08:55 +08:00
|
|
|
|
2021-08-19 19:59:58 +08:00
|
|
|
if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring)
|
2021-08-20 08:08:55 +08:00
|
|
|
return IRQ_HANDLED;
|
|
|
|
|
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-17 05:37:38 +08:00
|
|
|
rcu_read_lock();
|
|
|
|
ice_for_each_vf_rcu(pf, bkt, vf)
|
2022-02-17 05:37:35 +08:00
|
|
|
napi_schedule(&vf->repr->q_vector->napi);
|
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-17 05:37:38 +08:00
|
|
|
rcu_read_unlock();
|
2021-08-20 08:08:55 +08:00
|
|
|
|
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:10 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_alloc - Allocates the next available struct VSI in the PF
|
|
|
|
* @pf: board private structure
|
2020-02-28 02:15:04 +08:00
|
|
|
* @vsi_type: type of VSI
|
2021-10-16 07:35:15 +08:00
|
|
|
* @ch: ptr to channel
|
2022-02-17 05:37:29 +08:00
|
|
|
* @vf: VF for ICE_VSI_VF and ICE_VSI_CTRL
|
|
|
|
*
|
|
|
|
* The VF pointer is used for ICE_VSI_VF and ICE_VSI_CTRL. For ICE_VSI_CTRL,
|
|
|
|
* it may be NULL in the case there is no association with a VF. For
|
|
|
|
* ICE_VSI_VF the VF pointer *must not* be NULL.
|
2018-09-20 08:23:10 +08:00
|
|
|
*
|
|
|
|
* returns a pointer to a VSI on success, NULL on failure.
|
|
|
|
*/
|
2019-02-27 08:35:09 +08:00
|
|
|
static struct ice_vsi *
|
2021-10-16 07:35:15 +08:00
|
|
|
ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type,
|
2022-02-17 05:37:29 +08:00
|
|
|
struct ice_channel *ch, struct ice_vf *vf)
|
2018-09-20 08:23:10 +08:00
|
|
|
{
|
2019-11-08 22:23:26 +08:00
|
|
|
struct device *dev = ice_pf_to_dev(pf);
|
2018-09-20 08:23:10 +08:00
|
|
|
struct ice_vsi *vsi = NULL;
|
|
|
|
|
2022-02-17 05:37:29 +08:00
|
|
|
if (WARN_ON(vsi_type == ICE_VSI_VF && !vf))
|
|
|
|
return NULL;
|
|
|
|
|
2018-09-20 08:23:10 +08:00
|
|
|
/* Need to protect the allocation of the VSIs at the PF level */
|
|
|
|
mutex_lock(&pf->sw_mutex);
|
|
|
|
|
|
|
|
/* If we have already allocated our maximum number of VSIs,
|
|
|
|
* pf->next_vsi will be ICE_NO_VSI. If not, pf->next_vsi index
|
|
|
|
* is available to be populated
|
|
|
|
*/
|
|
|
|
if (pf->next_vsi == ICE_NO_VSI) {
|
2019-11-08 22:23:26 +08:00
|
|
|
dev_dbg(dev, "out of VSI slots!\n");
|
2018-09-20 08:23:10 +08:00
|
|
|
goto unlock_pf;
|
|
|
|
}
|
|
|
|
|
2019-11-08 22:23:26 +08:00
|
|
|
vsi = devm_kzalloc(dev, sizeof(*vsi), GFP_KERNEL);
|
2018-09-20 08:23:10 +08:00
|
|
|
if (!vsi)
|
|
|
|
goto unlock_pf;
|
|
|
|
|
2020-02-28 02:15:04 +08:00
|
|
|
vsi->type = vsi_type;
|
2018-09-20 08:23:10 +08:00
|
|
|
vsi->back = pf;
|
2021-03-03 02:15:37 +08:00
|
|
|
set_bit(ICE_VSI_DOWN, vsi->state);
|
2019-08-08 22:39:38 +08:00
|
|
|
|
2020-02-28 02:15:04 +08:00
|
|
|
if (vsi_type == ICE_VSI_VF)
|
2022-02-17 05:37:29 +08:00
|
|
|
ice_vsi_set_num_qs(vsi, vf);
|
2021-10-16 07:35:15 +08:00
|
|
|
else if (vsi_type != ICE_VSI_CHNL)
|
2022-02-17 05:37:29 +08:00
|
|
|
ice_vsi_set_num_qs(vsi, NULL);
|
2018-09-20 08:23:10 +08:00
|
|
|
|
|
|
|
switch (vsi->type) {
|
2021-08-20 08:08:55 +08:00
|
|
|
case ICE_VSI_SWITCHDEV_CTRL:
|
|
|
|
if (ice_vsi_alloc_arrays(vsi))
|
|
|
|
goto err_rings;
|
|
|
|
|
|
|
|
/* Setup eswitch MSIX irq handler for VSI */
|
|
|
|
vsi->irq_handler = ice_eswitch_msix_clean_rings;
|
|
|
|
break;
|
2018-09-20 08:23:10 +08:00
|
|
|
case ICE_VSI_PF:
|
2019-04-17 01:21:19 +08:00
|
|
|
if (ice_vsi_alloc_arrays(vsi))
|
2018-09-20 08:23:10 +08:00
|
|
|
goto err_rings;
|
|
|
|
|
|
|
|
/* Setup default MSIX irq handler for VSI */
|
|
|
|
vsi->irq_handler = ice_msix_clean_rings;
|
|
|
|
break;
|
2020-05-12 09:01:40 +08:00
|
|
|
case ICE_VSI_CTRL:
|
|
|
|
if (ice_vsi_alloc_arrays(vsi))
|
|
|
|
goto err_rings;
|
|
|
|
|
|
|
|
/* Setup ctrl VSI MSIX irq handler */
|
|
|
|
vsi->irq_handler = ice_msix_clean_ctrl_vsi;
|
2022-02-17 05:37:29 +08:00
|
|
|
|
|
|
|
/* For the PF control VSI this is NULL, for the VF control VSI
|
|
|
|
* this will be the first VF to allocate it.
|
|
|
|
*/
|
|
|
|
vsi->vf = vf;
|
2020-05-12 09:01:40 +08:00
|
|
|
break;
|
2018-09-20 08:42:56 +08:00
|
|
|
case ICE_VSI_VF:
|
2019-04-17 01:21:19 +08:00
|
|
|
if (ice_vsi_alloc_arrays(vsi))
|
2018-09-20 08:42:56 +08:00
|
|
|
goto err_rings;
|
2022-02-17 05:37:29 +08:00
|
|
|
vsi->vf = vf;
|
2018-09-20 08:42:56 +08:00
|
|
|
break;
|
2021-10-16 07:35:15 +08:00
|
|
|
case ICE_VSI_CHNL:
|
|
|
|
if (!ch)
|
|
|
|
goto err_rings;
|
|
|
|
vsi->num_rxq = ch->num_rxq;
|
|
|
|
vsi->num_txq = ch->num_txq;
|
|
|
|
vsi->next_base_q = ch->base_q;
|
|
|
|
break;
|
2019-04-17 01:30:43 +08:00
|
|
|
case ICE_VSI_LB:
|
|
|
|
if (ice_vsi_alloc_arrays(vsi))
|
|
|
|
goto err_rings;
|
|
|
|
break;
|
2018-09-20 08:23:10 +08:00
|
|
|
default:
|
2019-11-08 22:23:26 +08:00
|
|
|
dev_warn(dev, "Unknown VSI type %d\n", vsi->type);
|
2018-09-20 08:23:10 +08:00
|
|
|
goto unlock_pf;
|
|
|
|
}
|
|
|
|
|
2022-02-17 05:37:29 +08:00
|
|
|
if (vsi->type == ICE_VSI_CTRL && !vf) {
|
2021-03-09 11:08:03 +08:00
|
|
|
/* Use the last VSI slot as the index for PF control VSI */
|
2020-05-12 09:01:40 +08:00
|
|
|
vsi->idx = pf->num_alloc_vsi - 1;
|
|
|
|
pf->ctrl_vsi_idx = vsi->idx;
|
|
|
|
pf->vsi[vsi->idx] = vsi;
|
|
|
|
} else {
|
|
|
|
/* fill slot and make note of the index */
|
|
|
|
vsi->idx = pf->next_vsi;
|
|
|
|
pf->vsi[pf->next_vsi] = vsi;
|
2018-09-20 08:23:10 +08:00
|
|
|
|
2020-05-12 09:01:40 +08:00
|
|
|
/* prepare pf->next_vsi for next use */
|
|
|
|
pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
|
|
|
|
pf->next_vsi);
|
|
|
|
}
|
2021-03-09 11:08:03 +08:00
|
|
|
|
2022-02-17 05:37:29 +08:00
|
|
|
if (vsi->type == ICE_VSI_CTRL && vf)
|
|
|
|
vf->ctrl_vsi_idx = vsi->idx;
|
2018-09-20 08:23:10 +08:00
|
|
|
goto unlock_pf;
|
|
|
|
|
|
|
|
err_rings:
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi);
|
2018-09-20 08:23:10 +08:00
|
|
|
vsi = NULL;
|
|
|
|
unlock_pf:
|
|
|
|
mutex_unlock(&pf->sw_mutex);
|
|
|
|
return vsi;
|
|
|
|
}
|
|
|
|
|
2020-05-12 09:01:40 +08:00
|
|
|
/**
|
|
|
|
* ice_alloc_fd_res - Allocate FD resource for a VSI
|
|
|
|
* @vsi: pointer to the ice_vsi
|
|
|
|
*
|
|
|
|
* This allocates the FD resources
|
|
|
|
*
|
|
|
|
* Returns 0 on success, -EPERM on no-op or -EIO on failure
|
|
|
|
*/
|
|
|
|
static int ice_alloc_fd_res(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
u32 g_val, b_val;
|
|
|
|
|
2021-12-30 02:54:33 +08:00
|
|
|
/* Flow Director filters are only allocated/assigned to the PF VSI or
|
|
|
|
* CHNL VSI which passes the traffic. The CTRL VSI is only used to
|
|
|
|
* add/delete filters so resources are not allocated to it
|
2020-05-12 09:01:40 +08:00
|
|
|
*/
|
2021-12-30 02:54:33 +08:00
|
|
|
if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
if (!(vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF ||
|
|
|
|
vsi->type == ICE_VSI_CHNL))
|
|
|
|
return -EPERM;
|
2020-05-12 09:01:40 +08:00
|
|
|
|
|
|
|
/* FD filters from guaranteed pool per VSI */
|
|
|
|
g_val = pf->hw.func_caps.fd_fltr_guar;
|
|
|
|
if (!g_val)
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
/* FD filters from best effort pool */
|
|
|
|
b_val = pf->hw.func_caps.fd_fltr_best_effort;
|
|
|
|
if (!b_val)
|
|
|
|
return -EPERM;
|
|
|
|
|
2021-12-30 02:54:33 +08:00
|
|
|
/* PF main VSI gets only 64 FD resources from guaranteed pool
|
|
|
|
* when ADQ is configured.
|
|
|
|
*/
|
|
|
|
#define ICE_PF_VSI_GFLTR 64
|
2020-05-12 09:01:40 +08:00
|
|
|
|
2021-12-30 02:54:33 +08:00
|
|
|
/* determine FD filter resources per VSI from shared(best effort) and
|
|
|
|
* dedicated pool
|
|
|
|
*/
|
|
|
|
if (vsi->type == ICE_VSI_PF) {
|
|
|
|
vsi->num_gfltr = g_val;
|
|
|
|
/* if MQPRIO is configured, main VSI doesn't get all FD
|
|
|
|
* resources from guaranteed pool. PF VSI gets 64 FD resources
|
|
|
|
*/
|
|
|
|
if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
|
|
|
|
if (g_val < ICE_PF_VSI_GFLTR)
|
|
|
|
return -EPERM;
|
|
|
|
/* allow bare minimum entries for PF VSI */
|
|
|
|
vsi->num_gfltr = ICE_PF_VSI_GFLTR;
|
|
|
|
}
|
2020-05-12 09:01:40 +08:00
|
|
|
|
2021-12-30 02:54:33 +08:00
|
|
|
/* each VSI gets same "best_effort" quota */
|
|
|
|
vsi->num_bfltr = b_val;
|
|
|
|
} else if (vsi->type == ICE_VSI_VF) {
|
|
|
|
vsi->num_gfltr = 0;
|
2020-05-12 09:01:40 +08:00
|
|
|
|
2021-12-30 02:54:33 +08:00
|
|
|
/* each VSI gets same "best_effort" quota */
|
|
|
|
vsi->num_bfltr = b_val;
|
|
|
|
} else {
|
|
|
|
struct ice_vsi *main_vsi;
|
|
|
|
int numtc;
|
2020-05-12 09:01:40 +08:00
|
|
|
|
2021-12-30 02:54:33 +08:00
|
|
|
main_vsi = ice_get_main_vsi(pf);
|
|
|
|
if (!main_vsi)
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
if (!main_vsi->all_numtc)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* figure out ADQ numtc */
|
|
|
|
numtc = main_vsi->all_numtc - ICE_CHNL_START_TC;
|
|
|
|
|
|
|
|
/* only one TC but still asking resources for channels,
|
|
|
|
* invalid config
|
|
|
|
*/
|
|
|
|
if (numtc < ICE_CHNL_START_TC)
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
g_val -= ICE_PF_VSI_GFLTR;
|
|
|
|
/* channel VSIs gets equal share from guaranteed pool */
|
|
|
|
vsi->num_gfltr = g_val / numtc;
|
2021-03-09 11:08:03 +08:00
|
|
|
|
|
|
|
/* each VSI gets same "best_effort" quota */
|
|
|
|
vsi->num_bfltr = b_val;
|
|
|
|
}
|
|
|
|
|
2020-05-12 09:01:40 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:09 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_get_qs - Assign queues from PF to VSI
|
|
|
|
* @vsi: the VSI to assign queues to
|
|
|
|
*
|
|
|
|
* Returns 0 on success and a negative value on error
|
|
|
|
*/
|
2018-09-20 08:23:10 +08:00
|
|
|
static int ice_vsi_get_qs(struct ice_vsi *vsi)
|
2018-09-20 08:23:09 +08:00
|
|
|
{
|
2018-12-20 02:03:27 +08:00
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
struct ice_qs_cfg tx_qs_cfg = {
|
|
|
|
.qs_mutex = &pf->avail_q_mutex,
|
|
|
|
.pf_map = pf->avail_txqs,
|
2019-08-02 16:25:21 +08:00
|
|
|
.pf_map_size = pf->max_pf_txqs,
|
2018-12-20 02:03:27 +08:00
|
|
|
.q_count = vsi->alloc_txq,
|
|
|
|
.scatter_count = ICE_MAX_SCATTER_TXQS,
|
|
|
|
.vsi_map = vsi->txq_map,
|
|
|
|
.vsi_map_offset = 0,
|
2020-01-22 23:21:30 +08:00
|
|
|
.mapping_mode = ICE_VSI_MAP_CONTIG
|
2018-12-20 02:03:27 +08:00
|
|
|
};
|
|
|
|
struct ice_qs_cfg rx_qs_cfg = {
|
|
|
|
.qs_mutex = &pf->avail_q_mutex,
|
|
|
|
.pf_map = pf->avail_rxqs,
|
2019-08-02 16:25:21 +08:00
|
|
|
.pf_map_size = pf->max_pf_rxqs,
|
2018-12-20 02:03:27 +08:00
|
|
|
.q_count = vsi->alloc_rxq,
|
|
|
|
.scatter_count = ICE_MAX_SCATTER_RXQS,
|
|
|
|
.vsi_map = vsi->rxq_map,
|
|
|
|
.vsi_map_offset = 0,
|
2020-01-22 23:21:30 +08:00
|
|
|
.mapping_mode = ICE_VSI_MAP_CONTIG
|
2018-12-20 02:03:27 +08:00
|
|
|
};
|
2020-01-22 23:21:30 +08:00
|
|
|
int ret;
|
2018-09-20 08:23:09 +08:00
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi->type == ICE_VSI_CHNL)
|
|
|
|
return 0;
|
|
|
|
|
2018-12-20 02:03:27 +08:00
|
|
|
ret = __ice_vsi_get_qs(&tx_qs_cfg);
|
2020-01-22 23:21:30 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
vsi->tx_mapping_mode = tx_qs_cfg.mapping_mode;
|
2018-09-20 08:23:09 +08:00
|
|
|
|
2020-01-22 23:21:30 +08:00
|
|
|
ret = __ice_vsi_get_qs(&rx_qs_cfg);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
vsi->rx_mapping_mode = rx_qs_cfg.mapping_mode;
|
|
|
|
|
|
|
|
return 0;
|
2018-09-20 08:23:09 +08:00
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:06 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_put_qs - Release queues from VSI to PF
|
|
|
|
* @vsi: the VSI that is going to release queues
|
|
|
|
*/
|
2020-09-02 23:53:46 +08:00
|
|
|
static void ice_vsi_put_qs(struct ice_vsi *vsi)
|
2018-09-20 08:23:06 +08:00
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
mutex_lock(&pf->avail_q_mutex);
|
|
|
|
|
2021-08-19 20:00:04 +08:00
|
|
|
ice_for_each_alloc_txq(vsi, i) {
|
2018-09-20 08:23:06 +08:00
|
|
|
clear_bit(vsi->txq_map[i], pf->avail_txqs);
|
|
|
|
vsi->txq_map[i] = ICE_INVAL_Q_INDEX;
|
|
|
|
}
|
|
|
|
|
2021-08-19 20:00:04 +08:00
|
|
|
ice_for_each_alloc_rxq(vsi, i) {
|
2018-09-20 08:23:06 +08:00
|
|
|
clear_bit(vsi->rxq_map[i], pf->avail_rxqs);
|
|
|
|
vsi->rxq_map[i] = ICE_INVAL_Q_INDEX;
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_unlock(&pf->avail_q_mutex);
|
|
|
|
}
|
|
|
|
|
2019-09-09 21:47:46 +08:00
|
|
|
/**
|
|
|
|
* ice_is_safe_mode
|
|
|
|
* @pf: pointer to the PF struct
|
|
|
|
*
|
|
|
|
* returns true if driver is in safe mode, false otherwise
|
|
|
|
*/
|
|
|
|
bool ice_is_safe_mode(struct ice_pf *pf)
|
|
|
|
{
|
|
|
|
return !test_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
|
|
|
|
}
|
|
|
|
|
2021-05-20 22:37:49 +08:00
|
|
|
/**
|
2022-02-12 02:26:03 +08:00
|
|
|
* ice_is_rdma_ena
|
2021-05-20 22:37:49 +08:00
|
|
|
* @pf: pointer to the PF struct
|
|
|
|
*
|
2022-02-12 02:26:03 +08:00
|
|
|
* returns true if RDMA is currently supported, false otherwise
|
2021-05-20 22:37:49 +08:00
|
|
|
*/
|
2022-02-12 02:26:03 +08:00
|
|
|
bool ice_is_rdma_ena(struct ice_pf *pf)
|
2021-05-20 22:37:49 +08:00
|
|
|
{
|
2022-02-12 02:26:03 +08:00
|
|
|
return test_bit(ICE_FLAG_RDMA_ENA, pf->flags);
|
2021-05-20 22:37:49 +08:00
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:09 +08:00
|
|
|
/**
|
2020-01-17 23:39:16 +08:00
|
|
|
* ice_vsi_clean_rss_flow_fld - Delete RSS configuration
|
|
|
|
* @vsi: the VSI being cleaned up
|
|
|
|
*
|
|
|
|
* This function deletes RSS input set for all flows that were configured
|
|
|
|
* for this VSI
|
|
|
|
*/
|
|
|
|
static void ice_vsi_clean_rss_flow_fld(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
2021-10-08 06:56:57 +08:00
|
|
|
int status;
|
2020-01-17 23:39:16 +08:00
|
|
|
|
|
|
|
if (ice_is_safe_mode(pf))
|
|
|
|
return;
|
|
|
|
|
|
|
|
status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx);
|
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_dbg(ice_pf_to_dev(pf), "ice_rem_vsi_rss_cfg failed for vsi = %d, error = %d\n",
|
|
|
|
vsi->vsi_num, status);
|
2020-01-17 23:39:16 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_rss_clean - Delete RSS related VSI structures and configuration
|
2018-09-20 08:23:09 +08:00
|
|
|
* @vsi: the VSI being removed
|
|
|
|
*/
|
|
|
|
static void ice_rss_clean(struct ice_vsi *vsi)
|
|
|
|
{
|
2019-11-08 22:23:26 +08:00
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
struct device *dev;
|
2018-09-20 08:23:09 +08:00
|
|
|
|
2019-11-08 22:23:26 +08:00
|
|
|
dev = ice_pf_to_dev(pf);
|
2018-09-20 08:23:09 +08:00
|
|
|
|
|
|
|
if (vsi->rss_hkey_user)
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi->rss_hkey_user);
|
2018-09-20 08:23:09 +08:00
|
|
|
if (vsi->rss_lut_user)
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_kfree(dev, vsi->rss_lut_user);
|
2020-01-17 23:39:16 +08:00
|
|
|
|
|
|
|
ice_vsi_clean_rss_flow_fld(vsi);
|
|
|
|
/* remove RSS replay list */
|
|
|
|
if (!ice_is_safe_mode(pf))
|
|
|
|
ice_rem_vsi_rss_list(&pf->hw, vsi->idx);
|
2018-09-20 08:23:09 +08:00
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:07 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_set_rss_params - Setup RSS capabilities per VSI type
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
*/
|
2018-09-20 08:23:10 +08:00
|
|
|
static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
|
2018-09-20 08:23:07 +08:00
|
|
|
{
|
|
|
|
struct ice_hw_common_caps *cap;
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
|
|
|
|
if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
|
|
|
|
vsi->rss_size = 1;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
cap = &pf->hw.func_caps.common_cap;
|
|
|
|
switch (vsi->type) {
|
2021-10-16 07:35:15 +08:00
|
|
|
case ICE_VSI_CHNL:
|
2018-09-20 08:23:07 +08:00
|
|
|
case ICE_VSI_PF:
|
|
|
|
/* PF VSI will inherit RSS instance of PF */
|
2020-05-08 08:41:05 +08:00
|
|
|
vsi->rss_table_size = (u16)cap->rss_table_size;
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi->type == ICE_VSI_CHNL)
|
|
|
|
vsi->rss_size = min_t(u16, vsi->num_rxq,
|
|
|
|
BIT(cap->rss_table_entry_width));
|
|
|
|
else
|
|
|
|
vsi->rss_size = min_t(u16, num_online_cpus(),
|
|
|
|
BIT(cap->rss_table_entry_width));
|
2018-09-20 08:23:07 +08:00
|
|
|
vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
|
|
|
|
break;
|
2021-08-20 08:08:55 +08:00
|
|
|
case ICE_VSI_SWITCHDEV_CTRL:
|
|
|
|
vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
|
|
|
|
vsi->rss_size = min_t(u16, num_online_cpus(),
|
|
|
|
BIT(cap->rss_table_entry_width));
|
|
|
|
vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
|
|
|
|
break;
|
2018-09-20 08:42:56 +08:00
|
|
|
case ICE_VSI_VF:
|
2020-02-28 02:14:52 +08:00
|
|
|
/* VF VSI will get a small RSS table.
|
|
|
|
* For VSI_LUT, LUT size should be set to 64 bytes.
|
2018-09-20 08:42:56 +08:00
|
|
|
*/
|
|
|
|
vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
|
2020-02-28 02:14:52 +08:00
|
|
|
vsi->rss_size = ICE_MAX_RSS_QS_PER_VF;
|
2018-09-20 08:42:56 +08:00
|
|
|
vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
|
|
|
|
break;
|
2019-04-17 01:30:43 +08:00
|
|
|
case ICE_VSI_LB:
|
|
|
|
break;
|
2018-09-20 08:23:07 +08:00
|
|
|
default:
|
2020-05-12 09:01:40 +08:00
|
|
|
dev_dbg(ice_pf_to_dev(pf), "Unsupported VSI type %s\n",
|
|
|
|
ice_vsi_type_str(vsi->type));
|
2018-09-20 08:23:07 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI
|
ice: Advertise 802.1ad VLAN filtering and offloads for PF netdev
In order for the driver to support 802.1ad VLAN filtering and offloads,
it needs to advertise those VLAN features and also support modifying
those VLAN features, so make the necessary changes to
ice_set_netdev_features(). By default, enable CTAG insertion/stripping
and CTAG filtering for both Single and Double VLAN Modes (SVM/DVM).
Also, in DVM, enable STAG filtering by default. This is done by
setting the feature bits in netdev->features. Also, in DVM, support
toggling of STAG insertion/stripping, but don't enable them by
default. This is done by setting the feature bits in
netdev->hw_features.
Since 802.1ad VLAN filtering and offloads are only supported in DVM, make
sure they are not enabled by default and that they cannot be enabled
during runtime, when the device is in SVM.
Add an implementation for the ndo_fix_features() callback. This is
needed since the hardware cannot support multiple VLAN ethertypes for
VLAN insertion/stripping simultaneously and all supported VLAN filtering
must either be enabled or disabled together.
Disable inner VLAN stripping by default when DVM is enabled. If a VSI
supports stripping the inner VLAN in DVM, then it will have to configure
that during runtime. For example if a VF is configured in a port VLAN
while DVM is enabled it will be allowed to offload inner VLANs.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-12-03 00:38:50 +08:00
|
|
|
* @hw: HW structure used to determine the VLAN mode of the device
|
2018-09-20 08:23:07 +08:00
|
|
|
* @ctxt: the VSI context being set
|
|
|
|
*
|
|
|
|
* This initializes a default VSI context for all sections except the Queues.
|
|
|
|
*/
|
ice: Advertise 802.1ad VLAN filtering and offloads for PF netdev
In order for the driver to support 802.1ad VLAN filtering and offloads,
it needs to advertise those VLAN features and also support modifying
those VLAN features, so make the necessary changes to
ice_set_netdev_features(). By default, enable CTAG insertion/stripping
and CTAG filtering for both Single and Double VLAN Modes (SVM/DVM).
Also, in DVM, enable STAG filtering by default. This is done by
setting the feature bits in netdev->features. Also, in DVM, support
toggling of STAG insertion/stripping, but don't enable them by
default. This is done by setting the feature bits in
netdev->hw_features.
Since 802.1ad VLAN filtering and offloads are only supported in DVM, make
sure they are not enabled by default and that they cannot be enabled
during runtime, when the device is in SVM.
Add an implementation for the ndo_fix_features() callback. This is
needed since the hardware cannot support multiple VLAN ethertypes for
VLAN insertion/stripping simultaneously and all supported VLAN filtering
must either be enabled or disabled together.
Disable inner VLAN stripping by default when DVM is enabled. If a VSI
supports stripping the inner VLAN in DVM, then it will have to configure
that during runtime. For example if a VF is configured in a port VLAN
while DVM is enabled it will be allowed to offload inner VLANs.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-12-03 00:38:50 +08:00
|
|
|
static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt)
|
2018-09-20 08:23:07 +08:00
|
|
|
{
|
|
|
|
u32 table = 0;
|
|
|
|
|
|
|
|
memset(&ctxt->info, 0, sizeof(ctxt->info));
|
|
|
|
/* VSI's should be allocated from shared pool */
|
|
|
|
ctxt->alloc_from_pool = true;
|
|
|
|
/* Src pruning enabled by default */
|
|
|
|
ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
|
|
|
|
/* Traffic from VSI can be sent to LAN */
|
|
|
|
ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
|
ice: Advertise 802.1ad VLAN filtering and offloads for PF netdev
In order for the driver to support 802.1ad VLAN filtering and offloads,
it needs to advertise those VLAN features and also support modifying
those VLAN features, so make the necessary changes to
ice_set_netdev_features(). By default, enable CTAG insertion/stripping
and CTAG filtering for both Single and Double VLAN Modes (SVM/DVM).
Also, in DVM, enable STAG filtering by default. This is done by
setting the feature bits in netdev->features. Also, in DVM, support
toggling of STAG insertion/stripping, but don't enable them by
default. This is done by setting the feature bits in
netdev->hw_features.
Since 802.1ad VLAN filtering and offloads are only supported in DVM, make
sure they are not enabled by default and that they cannot be enabled
during runtime, when the device is in SVM.
Add an implementation for the ndo_fix_features() callback. This is
needed since the hardware cannot support multiple VLAN ethertypes for
VLAN insertion/stripping simultaneously and all supported VLAN filtering
must either be enabled or disabled together.
Disable inner VLAN stripping by default when DVM is enabled. If a VSI
supports stripping the inner VLAN in DVM, then it will have to configure
that during runtime. For example if a VF is configured in a port VLAN
while DVM is enabled it will be allowed to offload inner VLANs.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-12-03 00:38:50 +08:00
|
|
|
/* allow all untagged/tagged packets by default on Tx */
|
2021-12-03 00:38:45 +08:00
|
|
|
ctxt->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL &
|
|
|
|
ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >>
|
|
|
|
ICE_AQ_VSI_INNER_VLAN_TX_MODE_S);
|
ice: Advertise 802.1ad VLAN filtering and offloads for PF netdev
In order for the driver to support 802.1ad VLAN filtering and offloads,
it needs to advertise those VLAN features and also support modifying
those VLAN features, so make the necessary changes to
ice_set_netdev_features(). By default, enable CTAG insertion/stripping
and CTAG filtering for both Single and Double VLAN Modes (SVM/DVM).
Also, in DVM, enable STAG filtering by default. This is done by
setting the feature bits in netdev->features. Also, in DVM, support
toggling of STAG insertion/stripping, but don't enable them by
default. This is done by setting the feature bits in
netdev->hw_features.
Since 802.1ad VLAN filtering and offloads are only supported in DVM, make
sure they are not enabled by default and that they cannot be enabled
during runtime, when the device is in SVM.
Add an implementation for the ndo_fix_features() callback. This is
needed since the hardware cannot support multiple VLAN ethertypes for
VLAN insertion/stripping simultaneously and all supported VLAN filtering
must either be enabled or disabled together.
Disable inner VLAN stripping by default when DVM is enabled. If a VSI
supports stripping the inner VLAN in DVM, then it will have to configure
that during runtime. For example if a VF is configured in a port VLAN
while DVM is enabled it will be allowed to offload inner VLANs.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-12-03 00:38:50 +08:00
|
|
|
/* SVM - by default bits 3 and 4 in inner_vlan_flags are 0's which
|
|
|
|
* results in legacy behavior (show VLAN, DEI, and UP) in descriptor.
|
|
|
|
*
|
|
|
|
* DVM - leave inner VLAN in packet by default
|
|
|
|
*/
|
|
|
|
if (ice_is_dvm_ena(hw)) {
|
|
|
|
ctxt->info.inner_vlan_flags |=
|
|
|
|
ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
|
|
|
|
ctxt->info.outer_vlan_flags =
|
|
|
|
(ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL <<
|
|
|
|
ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) &
|
|
|
|
ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M;
|
|
|
|
ctxt->info.outer_vlan_flags |=
|
|
|
|
(ICE_AQ_VSI_OUTER_TAG_VLAN_8100 <<
|
|
|
|
ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
|
|
|
|
ICE_AQ_VSI_OUTER_TAG_TYPE_M;
|
|
|
|
}
|
2018-09-20 08:23:07 +08:00
|
|
|
/* Have 1:1 UP mapping for both ingress/egress tables */
|
|
|
|
table |= ICE_UP_TABLE_TRANSLATE(0, 0);
|
|
|
|
table |= ICE_UP_TABLE_TRANSLATE(1, 1);
|
|
|
|
table |= ICE_UP_TABLE_TRANSLATE(2, 2);
|
|
|
|
table |= ICE_UP_TABLE_TRANSLATE(3, 3);
|
|
|
|
table |= ICE_UP_TABLE_TRANSLATE(4, 4);
|
|
|
|
table |= ICE_UP_TABLE_TRANSLATE(5, 5);
|
|
|
|
table |= ICE_UP_TABLE_TRANSLATE(6, 6);
|
|
|
|
table |= ICE_UP_TABLE_TRANSLATE(7, 7);
|
|
|
|
ctxt->info.ingress_table = cpu_to_le32(table);
|
|
|
|
ctxt->info.egress_table = cpu_to_le32(table);
|
|
|
|
/* Have 1:1 UP mapping for outer to inner UP table */
|
|
|
|
ctxt->info.outer_up_table = cpu_to_le32(table);
|
|
|
|
/* No Outer tag support outer_tag_flags remains to zero */
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_setup_q_map - Setup a VSI queue map
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
* @ctxt: VSI context structure
|
|
|
|
*/
|
|
|
|
static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
|
|
|
|
{
|
2021-03-03 02:15:33 +08:00
|
|
|
u16 offset = 0, qmap = 0, tx_count = 0, pow = 0;
|
|
|
|
u16 num_txq_per_tc, num_rxq_per_tc;
|
2018-09-20 08:23:07 +08:00
|
|
|
u16 qcount_tx = vsi->alloc_txq;
|
|
|
|
u16 qcount_rx = vsi->alloc_rxq;
|
2018-10-27 02:44:35 +08:00
|
|
|
u8 netdev_tc = 0;
|
2018-09-20 08:23:07 +08:00
|
|
|
int i;
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
if (!vsi->tc_cfg.numtc) {
|
|
|
|
/* at least TC0 should be enabled by default */
|
|
|
|
vsi->tc_cfg.numtc = 1;
|
|
|
|
vsi->tc_cfg.ena_tc = 1;
|
2018-09-20 08:23:07 +08:00
|
|
|
}
|
|
|
|
|
2021-03-03 02:15:33 +08:00
|
|
|
num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC);
|
|
|
|
if (!num_rxq_per_tc)
|
|
|
|
num_rxq_per_tc = 1;
|
|
|
|
num_txq_per_tc = qcount_tx / vsi->tc_cfg.numtc;
|
|
|
|
if (!num_txq_per_tc)
|
|
|
|
num_txq_per_tc = 1;
|
|
|
|
|
|
|
|
/* find the (rounded up) power-of-2 of qcount */
|
|
|
|
pow = (u16)order_base_2(num_rxq_per_tc);
|
2018-09-20 08:23:07 +08:00
|
|
|
|
|
|
|
/* TC mapping is a function of the number of Rx queues assigned to the
|
|
|
|
* VSI for each traffic class and the offset of these queues.
|
|
|
|
* The first 10 bits are for queue offset for TC0, next 4 bits for no:of
|
|
|
|
* queues allocated to TC0. No:of queues is a power-of-2.
|
|
|
|
*
|
|
|
|
* If TC is not enabled, the queue offset is set to 0, and allocate one
|
|
|
|
* queue, this way, traffic for the given TC will be sent to the default
|
|
|
|
* queue.
|
|
|
|
*
|
|
|
|
* Setup number and offset of Rx queues for all TCs for the VSI
|
|
|
|
*/
|
2019-02-14 02:51:10 +08:00
|
|
|
ice_for_each_traffic_class(i) {
|
2018-09-20 08:23:07 +08:00
|
|
|
if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
|
|
|
|
/* TC is not enabled */
|
|
|
|
vsi->tc_cfg.tc_info[i].qoffset = 0;
|
2018-10-27 02:44:35 +08:00
|
|
|
vsi->tc_cfg.tc_info[i].qcount_rx = 1;
|
|
|
|
vsi->tc_cfg.tc_info[i].qcount_tx = 1;
|
|
|
|
vsi->tc_cfg.tc_info[i].netdev_tc = 0;
|
2018-09-20 08:23:07 +08:00
|
|
|
ctxt->info.tc_mapping[i] = 0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* TC is enabled */
|
|
|
|
vsi->tc_cfg.tc_info[i].qoffset = offset;
|
2021-03-03 02:15:33 +08:00
|
|
|
vsi->tc_cfg.tc_info[i].qcount_rx = num_rxq_per_tc;
|
|
|
|
vsi->tc_cfg.tc_info[i].qcount_tx = num_txq_per_tc;
|
2018-10-27 02:44:35 +08:00
|
|
|
vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
|
2018-09-20 08:23:07 +08:00
|
|
|
|
|
|
|
qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
|
|
|
|
ICE_AQ_VSI_TC_Q_OFFSET_M) |
|
|
|
|
((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
|
|
|
|
ICE_AQ_VSI_TC_Q_NUM_M);
|
2021-03-03 02:15:33 +08:00
|
|
|
offset += num_rxq_per_tc;
|
|
|
|
tx_count += num_txq_per_tc;
|
2018-09-20 08:23:07 +08:00
|
|
|
ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
|
|
|
|
}
|
2019-02-27 08:35:10 +08:00
|
|
|
|
|
|
|
/* if offset is non-zero, means it is calculated correctly based on
|
|
|
|
* enabled TCs for a given VSI otherwise qcount_rx will always
|
|
|
|
* be correct and non-zero because it is based off - VSI's
|
|
|
|
* allocated Rx queues which is at least 1 (hence qcount_tx will be
|
|
|
|
* at least 1)
|
|
|
|
*/
|
|
|
|
if (offset)
|
|
|
|
vsi->num_rxq = offset;
|
|
|
|
else
|
2021-03-03 02:15:33 +08:00
|
|
|
vsi->num_rxq = num_rxq_per_tc;
|
2019-02-27 08:35:10 +08:00
|
|
|
|
2018-10-27 02:44:35 +08:00
|
|
|
vsi->num_txq = tx_count;
|
2018-09-20 08:23:07 +08:00
|
|
|
|
2018-09-20 08:42:56 +08:00
|
|
|
if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) {
|
2020-02-06 17:20:09 +08:00
|
|
|
dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n");
|
2018-09-20 08:42:56 +08:00
|
|
|
/* since there is a chance that num_rxq could have been changed
|
|
|
|
* in the above for loop, make num_txq equal to num_rxq.
|
|
|
|
*/
|
|
|
|
vsi->num_txq = vsi->num_rxq;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:07 +08:00
|
|
|
/* Rx queue mapping */
|
|
|
|
ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
|
|
|
|
/* q_mapping buffer holds the info for the first queue allocated for
|
|
|
|
* this VSI in the PF space and also the number of queues associated
|
|
|
|
* with this VSI.
|
|
|
|
*/
|
|
|
|
ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
|
|
|
|
ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq);
|
|
|
|
}
|
|
|
|
|
2020-05-12 09:01:40 +08:00
|
|
|
/**
|
|
|
|
* ice_set_fd_vsi_ctx - Set FD VSI context before adding a VSI
|
|
|
|
* @ctxt: the VSI context being set
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
*/
|
|
|
|
static void ice_set_fd_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
u8 dflt_q_group, dflt_q_prio;
|
|
|
|
u16 dflt_q, report_q, val;
|
|
|
|
|
2021-03-09 11:08:03 +08:00
|
|
|
if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL &&
|
2021-12-30 02:54:33 +08:00
|
|
|
vsi->type != ICE_VSI_VF && vsi->type != ICE_VSI_CHNL)
|
2020-05-12 09:01:40 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
val = ICE_AQ_VSI_PROP_FLOW_DIR_VALID;
|
|
|
|
ctxt->info.valid_sections |= cpu_to_le16(val);
|
|
|
|
dflt_q = 0;
|
|
|
|
dflt_q_group = 0;
|
|
|
|
report_q = 0;
|
|
|
|
dflt_q_prio = 0;
|
|
|
|
|
|
|
|
/* enable flow director filtering/programming */
|
|
|
|
val = ICE_AQ_VSI_FD_ENABLE | ICE_AQ_VSI_FD_PROG_ENABLE;
|
|
|
|
ctxt->info.fd_options = cpu_to_le16(val);
|
|
|
|
/* max of allocated flow director filters */
|
|
|
|
ctxt->info.max_fd_fltr_dedicated =
|
|
|
|
cpu_to_le16(vsi->num_gfltr);
|
|
|
|
/* max of shared flow director filters any VSI may program */
|
|
|
|
ctxt->info.max_fd_fltr_shared =
|
|
|
|
cpu_to_le16(vsi->num_bfltr);
|
|
|
|
/* default queue index within the VSI of the default FD */
|
|
|
|
val = ((dflt_q << ICE_AQ_VSI_FD_DEF_Q_S) &
|
|
|
|
ICE_AQ_VSI_FD_DEF_Q_M);
|
|
|
|
/* target queue or queue group to the FD filter */
|
|
|
|
val |= ((dflt_q_group << ICE_AQ_VSI_FD_DEF_GRP_S) &
|
|
|
|
ICE_AQ_VSI_FD_DEF_GRP_M);
|
|
|
|
ctxt->info.fd_def_q = cpu_to_le16(val);
|
|
|
|
/* queue index on which FD filter completion is reported */
|
|
|
|
val = ((report_q << ICE_AQ_VSI_FD_REPORT_Q_S) &
|
|
|
|
ICE_AQ_VSI_FD_REPORT_Q_M);
|
|
|
|
/* priority of the default qindex action */
|
|
|
|
val |= ((dflt_q_prio << ICE_AQ_VSI_FD_DEF_PRIORITY_S) &
|
|
|
|
ICE_AQ_VSI_FD_DEF_PRIORITY_M);
|
|
|
|
ctxt->info.fd_report_opt = cpu_to_le16(val);
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:07 +08:00
|
|
|
/**
|
|
|
|
* ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI
|
|
|
|
* @ctxt: the VSI context being set
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
*/
|
|
|
|
static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
u8 lut_type, hash_type;
|
2019-11-08 22:23:26 +08:00
|
|
|
struct device *dev;
|
2019-03-01 07:26:03 +08:00
|
|
|
struct ice_pf *pf;
|
|
|
|
|
|
|
|
pf = vsi->back;
|
2019-11-08 22:23:26 +08:00
|
|
|
dev = ice_pf_to_dev(pf);
|
2018-09-20 08:23:07 +08:00
|
|
|
|
|
|
|
switch (vsi->type) {
|
2021-10-16 07:35:15 +08:00
|
|
|
case ICE_VSI_CHNL:
|
2018-09-20 08:23:07 +08:00
|
|
|
case ICE_VSI_PF:
|
|
|
|
/* PF VSI will inherit RSS instance of PF */
|
|
|
|
lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
|
|
|
|
hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
|
|
|
|
break;
|
2018-09-20 08:42:56 +08:00
|
|
|
case ICE_VSI_VF:
|
|
|
|
/* VF VSI will gets a small RSS table which is a VSI LUT type */
|
|
|
|
lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
|
|
|
|
hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
|
|
|
|
break;
|
2020-05-12 09:01:40 +08:00
|
|
|
default:
|
2019-11-08 22:23:26 +08:00
|
|
|
dev_dbg(dev, "Unsupported VSI type %s\n",
|
2019-11-06 18:05:39 +08:00
|
|
|
ice_vsi_type_str(vsi->type));
|
2019-04-17 01:30:43 +08:00
|
|
|
return;
|
2018-09-20 08:23:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
|
|
|
|
ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
|
|
|
|
((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
|
|
|
|
ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
|
|
|
|
}
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
static void
|
|
|
|
ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
|
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
u16 qcount, qmap;
|
|
|
|
u8 offset = 0;
|
|
|
|
int pow;
|
|
|
|
|
|
|
|
qcount = min_t(int, vsi->num_rxq, pf->num_lan_msix);
|
|
|
|
|
|
|
|
pow = order_base_2(qcount);
|
|
|
|
qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
|
|
|
|
ICE_AQ_VSI_TC_Q_OFFSET_M) |
|
|
|
|
((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
|
|
|
|
ICE_AQ_VSI_TC_Q_NUM_M);
|
|
|
|
|
|
|
|
ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
|
|
|
|
ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
|
|
|
|
ctxt->info.q_mapping[0] = cpu_to_le16(vsi->next_base_q);
|
|
|
|
ctxt->info.q_mapping[1] = cpu_to_le16(qcount);
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:07 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_init - Create and initialize a VSI
|
|
|
|
* @vsi: the VSI being configured
|
2019-11-08 22:23:29 +08:00
|
|
|
* @init_vsi: is this call creating a VSI
|
2018-09-20 08:23:07 +08:00
|
|
|
*
|
|
|
|
* This initializes a VSI context depending on the VSI type to be added and
|
|
|
|
* passes it down to the add_vsi aq command to create a new VSI.
|
|
|
|
*/
|
2019-11-08 22:23:29 +08:00
|
|
|
static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
|
2018-09-20 08:23:07 +08:00
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
struct ice_hw *hw = &pf->hw;
|
2019-02-09 04:50:32 +08:00
|
|
|
struct ice_vsi_ctx *ctxt;
|
2019-11-08 22:23:29 +08:00
|
|
|
struct device *dev;
|
2018-09-20 08:23:07 +08:00
|
|
|
int ret = 0;
|
|
|
|
|
2019-11-08 22:23:29 +08:00
|
|
|
dev = ice_pf_to_dev(pf);
|
2019-11-08 22:23:25 +08:00
|
|
|
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
|
2019-02-09 04:50:32 +08:00
|
|
|
if (!ctxt)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2018-09-20 08:23:07 +08:00
|
|
|
switch (vsi->type) {
|
2020-05-12 09:01:40 +08:00
|
|
|
case ICE_VSI_CTRL:
|
2019-04-17 01:30:43 +08:00
|
|
|
case ICE_VSI_LB:
|
2018-09-20 08:23:07 +08:00
|
|
|
case ICE_VSI_PF:
|
2019-02-09 04:50:32 +08:00
|
|
|
ctxt->flags = ICE_AQ_VSI_TYPE_PF;
|
2018-09-20 08:23:07 +08:00
|
|
|
break;
|
2021-08-20 08:08:55 +08:00
|
|
|
case ICE_VSI_SWITCHDEV_CTRL:
|
2021-10-16 07:35:15 +08:00
|
|
|
case ICE_VSI_CHNL:
|
2021-08-20 08:08:55 +08:00
|
|
|
ctxt->flags = ICE_AQ_VSI_TYPE_VMDQ2;
|
|
|
|
break;
|
2018-09-20 08:42:56 +08:00
|
|
|
case ICE_VSI_VF:
|
2019-02-09 04:50:32 +08:00
|
|
|
ctxt->flags = ICE_AQ_VSI_TYPE_VF;
|
2018-09-20 08:42:56 +08:00
|
|
|
/* VF number here is the absolute VF number (0-255) */
|
2022-02-17 05:37:29 +08:00
|
|
|
ctxt->vf_num = vsi->vf->vf_id + hw->func_caps.vf_base_id;
|
2018-09-20 08:42:56 +08:00
|
|
|
break;
|
2018-09-20 08:23:07 +08:00
|
|
|
default:
|
2019-11-08 22:23:25 +08:00
|
|
|
ret = -ENODEV;
|
|
|
|
goto out;
|
2018-09-20 08:23:07 +08:00
|
|
|
}
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
/* Handle VLAN pruning for channel VSI if main VSI has VLAN
|
|
|
|
* prune enabled
|
|
|
|
*/
|
|
|
|
if (vsi->type == ICE_VSI_CHNL) {
|
|
|
|
struct ice_vsi *main_vsi;
|
|
|
|
|
|
|
|
main_vsi = ice_get_main_vsi(pf);
|
|
|
|
if (main_vsi && ice_vsi_is_vlan_pruning_ena(main_vsi))
|
|
|
|
ctxt->info.sw_flags2 |=
|
|
|
|
ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
|
|
|
|
else
|
|
|
|
ctxt->info.sw_flags2 &=
|
|
|
|
~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
|
|
|
|
}
|
|
|
|
|
ice: Advertise 802.1ad VLAN filtering and offloads for PF netdev
In order for the driver to support 802.1ad VLAN filtering and offloads,
it needs to advertise those VLAN features and also support modifying
those VLAN features, so make the necessary changes to
ice_set_netdev_features(). By default, enable CTAG insertion/stripping
and CTAG filtering for both Single and Double VLAN Modes (SVM/DVM).
Also, in DVM, enable STAG filtering by default. This is done by
setting the feature bits in netdev->features. Also, in DVM, support
toggling of STAG insertion/stripping, but don't enable them by
default. This is done by setting the feature bits in
netdev->hw_features.
Since 802.1ad VLAN filtering and offloads are only supported in DVM, make
sure they are not enabled by default and that they cannot be enabled
during runtime, when the device is in SVM.
Add an implementation for the ndo_fix_features() callback. This is
needed since the hardware cannot support multiple VLAN ethertypes for
VLAN insertion/stripping simultaneously and all supported VLAN filtering
must either be enabled or disabled together.
Disable inner VLAN stripping by default when DVM is enabled. If a VSI
supports stripping the inner VLAN in DVM, then it will have to configure
that during runtime. For example if a VF is configured in a port VLAN
while DVM is enabled it will be allowed to offload inner VLANs.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-12-03 00:38:50 +08:00
|
|
|
ice_set_dflt_vsi_ctx(hw, ctxt);
|
2020-05-12 09:01:40 +08:00
|
|
|
if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
|
|
|
|
ice_set_fd_vsi_ctx(ctxt, vsi);
|
2018-09-20 08:23:07 +08:00
|
|
|
/* if the switch is in VEB mode, allow VSI loopback */
|
|
|
|
if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB)
|
2019-02-09 04:50:32 +08:00
|
|
|
ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
|
2018-09-20 08:23:07 +08:00
|
|
|
|
|
|
|
/* Set LUT type and HASH type if RSS is enabled */
|
2020-05-12 09:01:40 +08:00
|
|
|
if (test_bit(ICE_FLAG_RSS_ENA, pf->flags) &&
|
|
|
|
vsi->type != ICE_VSI_CTRL) {
|
2019-02-09 04:50:32 +08:00
|
|
|
ice_set_rss_vsi_ctx(ctxt, vsi);
|
2019-11-08 22:23:29 +08:00
|
|
|
/* if updating VSI context, make sure to set valid_section:
|
|
|
|
* to indicate which section of VSI context being updated
|
|
|
|
*/
|
|
|
|
if (!init_vsi)
|
|
|
|
ctxt->info.valid_sections |=
|
|
|
|
cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
|
|
|
|
}
|
2018-09-20 08:23:07 +08:00
|
|
|
|
2019-02-09 04:50:32 +08:00
|
|
|
ctxt->info.sw_id = vsi->port_info->sw_id;
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi->type == ICE_VSI_CHNL) {
|
|
|
|
ice_chnl_vsi_setup_q_map(vsi, ctxt);
|
|
|
|
} else {
|
|
|
|
ice_vsi_setup_q_map(vsi, ctxt);
|
|
|
|
if (!init_vsi) /* means VSI being updated */
|
|
|
|
/* must to indicate which section of VSI context are
|
|
|
|
* being modified
|
|
|
|
*/
|
|
|
|
ctxt->info.valid_sections |=
|
|
|
|
cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
|
|
|
|
}
|
2018-09-20 08:23:07 +08:00
|
|
|
|
2019-07-29 17:04:43 +08:00
|
|
|
/* Allow control frames out of main VSI */
|
|
|
|
if (vsi->type == ICE_VSI_PF) {
|
|
|
|
ctxt->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
|
|
|
|
ctxt->info.valid_sections |=
|
|
|
|
cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
|
|
|
|
}
|
|
|
|
|
2019-11-08 22:23:29 +08:00
|
|
|
if (init_vsi) {
|
|
|
|
ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL);
|
|
|
|
if (ret) {
|
|
|
|
dev_err(dev, "Add VSI failed, err %d\n", ret);
|
|
|
|
ret = -EIO;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
|
|
|
|
if (ret) {
|
|
|
|
dev_err(dev, "Update VSI failed, err %d\n", ret);
|
|
|
|
ret = -EIO;
|
|
|
|
goto out;
|
|
|
|
}
|
2018-09-20 08:23:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* keep context for update VSI operations */
|
2019-02-09 04:50:32 +08:00
|
|
|
vsi->info = ctxt->info;
|
2018-09-20 08:23:07 +08:00
|
|
|
|
|
|
|
/* record VSI number returned */
|
2019-02-09 04:50:32 +08:00
|
|
|
vsi->vsi_num = ctxt->vsi_num;
|
2018-09-20 08:23:07 +08:00
|
|
|
|
2019-11-08 22:23:25 +08:00
|
|
|
out:
|
|
|
|
kfree(ctxt);
|
2018-09-20 08:23:07 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-02-28 02:14:53 +08:00
|
|
|
/**
|
|
|
|
* ice_free_res - free a block of resources
|
|
|
|
* @res: pointer to the resource
|
|
|
|
* @index: starting index previously returned by ice_get_res
|
|
|
|
* @id: identifier to track owner
|
|
|
|
*
|
|
|
|
* Returns number of resources freed
|
|
|
|
*/
|
|
|
|
int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
|
|
|
|
{
|
|
|
|
int count = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!res || index >= res->end)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
id |= ICE_RES_VALID_BIT;
|
|
|
|
for (i = index; i < res->end && res->list[i] == id; i++) {
|
|
|
|
res->list[i] = 0;
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_search_res - Search the tracker for a block of resources
|
|
|
|
* @res: pointer to the resource
|
|
|
|
* @needed: size of the block needed
|
|
|
|
* @id: identifier to track owner
|
|
|
|
*
|
|
|
|
* Returns the base item index of the block, or -ENOMEM for error
|
|
|
|
*/
|
|
|
|
static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
|
|
|
|
{
|
2020-05-08 08:41:05 +08:00
|
|
|
u16 start = 0, end = 0;
|
2020-02-28 02:14:53 +08:00
|
|
|
|
|
|
|
if (needed > res->end)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
id |= ICE_RES_VALID_BIT;
|
|
|
|
|
|
|
|
do {
|
|
|
|
/* skip already allocated entries */
|
|
|
|
if (res->list[end++] & ICE_RES_VALID_BIT) {
|
|
|
|
start = end;
|
|
|
|
if ((start + needed) > res->end)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (end == (start + needed)) {
|
|
|
|
int i = start;
|
|
|
|
|
|
|
|
/* there was enough, so assign it to the requestor */
|
|
|
|
while (i != end)
|
|
|
|
res->list[i++] = id;
|
|
|
|
|
|
|
|
return start;
|
|
|
|
}
|
|
|
|
} while (end < res->end);
|
|
|
|
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_get_free_res_count - Get free count from a resource tracker
|
|
|
|
* @res: Resource tracker instance
|
|
|
|
*/
|
|
|
|
static u16 ice_get_free_res_count(struct ice_res_tracker *res)
|
|
|
|
{
|
|
|
|
u16 i, count = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < res->end; i++)
|
|
|
|
if (!(res->list[i] & ICE_RES_VALID_BIT))
|
|
|
|
count++;
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_get_res - get a block of resources
|
|
|
|
* @pf: board private structure
|
|
|
|
* @res: pointer to the resource
|
|
|
|
* @needed: size of the block needed
|
|
|
|
* @id: identifier to track owner
|
|
|
|
*
|
|
|
|
* Returns the base item index of the block, or negative for error
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
|
|
|
|
{
|
|
|
|
if (!res || !pf)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) {
|
|
|
|
dev_err(ice_pf_to_dev(pf), "param err: needed=%d, num_entries = %d id=0x%04x\n",
|
|
|
|
needed, res->num_entries, id);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ice_search_res(res, needed, id);
|
|
|
|
}
|
|
|
|
|
2022-02-17 05:37:29 +08:00
|
|
|
/**
|
|
|
|
* ice_get_vf_ctrl_res - Get VF control VSI resource
|
|
|
|
* @pf: pointer to the PF structure
|
|
|
|
* @vsi: the VSI to allocate a resource for
|
|
|
|
*
|
|
|
|
* Look up whether another VF has already allocated the control VSI resource.
|
|
|
|
* If so, re-use this resource so that we share it among all VFs.
|
|
|
|
*
|
|
|
|
* Otherwise, allocate the resource and return it.
|
|
|
|
*/
|
|
|
|
static int ice_get_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi)
|
|
|
|
{
|
2022-02-17 05:37:35 +08:00
|
|
|
struct ice_vf *vf;
|
|
|
|
unsigned int bkt;
|
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-17 05:37:38 +08:00
|
|
|
int base;
|
2022-02-17 05:37:29 +08:00
|
|
|
|
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-17 05:37:38 +08:00
|
|
|
rcu_read_lock();
|
|
|
|
ice_for_each_vf_rcu(pf, bkt, vf) {
|
|
|
|
if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
|
|
|
|
base = pf->vsi[vf->ctrl_vsi_idx]->base_vector;
|
|
|
|
rcu_read_unlock();
|
|
|
|
return base;
|
|
|
|
}
|
2022-02-17 05:37:29 +08:00
|
|
|
}
|
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-17 05:37:38 +08:00
|
|
|
rcu_read_unlock();
|
2022-02-17 05:37:29 +08:00
|
|
|
|
|
|
|
return ice_get_res(pf, pf->irq_tracker, vsi->num_q_vectors,
|
|
|
|
ICE_RES_VF_CTRL_VEC_ID);
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:09 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_setup_vector_base - Set up the base vector for the given VSI
|
|
|
|
* @vsi: ptr to the VSI
|
|
|
|
*
|
|
|
|
* This should only be called after ice_vsi_alloc() which allocates the
|
|
|
|
* corresponding SW VSI structure and initializes num_queue_pairs for the
|
|
|
|
* newly allocated VSI.
|
|
|
|
*
|
|
|
|
* Returns 0 on success or negative on failure
|
|
|
|
*/
|
2018-09-20 08:23:10 +08:00
|
|
|
static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
|
2018-09-20 08:23:09 +08:00
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
2019-11-08 22:23:26 +08:00
|
|
|
struct device *dev;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
u16 num_q_vectors;
|
2020-05-08 08:41:05 +08:00
|
|
|
int base;
|
2018-09-20 08:23:09 +08:00
|
|
|
|
2019-11-08 22:23:26 +08:00
|
|
|
dev = ice_pf_to_dev(pf);
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
/* SRIOV doesn't grab irq_tracker entries for each VSI */
|
|
|
|
if (vsi->type == ICE_VSI_VF)
|
|
|
|
return 0;
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi->type == ICE_VSI_CHNL)
|
|
|
|
return 0;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
|
|
|
|
if (vsi->base_vector) {
|
2019-11-08 22:23:26 +08:00
|
|
|
dev_dbg(dev, "VSI %d has non-zero base vector %d\n",
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
vsi->vsi_num, vsi->base_vector);
|
2018-09-20 08:23:09 +08:00
|
|
|
return -EEXIST;
|
|
|
|
}
|
|
|
|
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
num_q_vectors = vsi->num_q_vectors;
|
|
|
|
/* reserve slots from OS requested IRQs */
|
2022-02-17 05:37:29 +08:00
|
|
|
if (vsi->type == ICE_VSI_CTRL && vsi->vf) {
|
|
|
|
base = ice_get_vf_ctrl_res(pf, vsi);
|
2021-03-09 11:08:03 +08:00
|
|
|
} else {
|
|
|
|
base = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
|
|
|
|
vsi->idx);
|
|
|
|
}
|
2020-05-08 08:41:05 +08:00
|
|
|
|
|
|
|
if (base < 0) {
|
2020-02-28 02:14:53 +08:00
|
|
|
dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n",
|
|
|
|
ice_get_free_res_count(pf->irq_tracker),
|
|
|
|
ice_vsi_type_str(vsi->type), vsi->idx, num_q_vectors);
|
2018-09-20 08:23:09 +08:00
|
|
|
return -ENOENT;
|
|
|
|
}
|
2020-05-08 08:41:05 +08:00
|
|
|
vsi->base_vector = (u16)base;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
pf->num_avail_sw_msix -= num_q_vectors;
|
ice: Split irq_tracker into sw_irq_tracker and hw_irq_tracker
For the PF driver, when mapping interrupts to queues, we need to request
IRQs from the kernel and we also have to allocate interrupts from
the device.
Similarly, when the VF driver (iavf.ko) initializes, it requests the kernel
IRQs that it needs but it can't directly allocate interrupts in the device.
Instead, it sends a mailbox message to the ice driver, which then allocates
interrupts in the device on the VF driver's behalf.
Currently both these cases end up having to reserve entries in
pf->irq_tracker but irq_tracker itself is sized based on how many vectors
the PF driver needs. Under the right circumstances, the VF driver can fail
to get entries in irq_tracker, which will result in the VF driver failing
probe.
To fix this, sw_irq_tracker and hw_irq_tracker are introduced. The
sw_irq_tracker tracks only the PF's IRQ request and doesn't play any
role in VF init. hw_irq_tracker represents the device's interrupt space.
When interrupts have to be allocated in the device for either PF or VF,
hw_irq_tracker will be looked up to see if the device has run out of
interrupts.
Signed-off-by: Preethi Banala <preethi.banala@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-09-20 08:23:16 +08:00
|
|
|
|
2018-09-20 08:23:09 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:07 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI
|
|
|
|
* @vsi: the VSI having rings deallocated
|
|
|
|
*/
|
2018-09-20 08:23:09 +08:00
|
|
|
static void ice_vsi_clear_rings(struct ice_vsi *vsi)
|
2018-09-20 08:23:07 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2020-09-02 23:53:47 +08:00
|
|
|
/* Avoid stale references by clearing map from vector to ring */
|
|
|
|
if (vsi->q_vectors) {
|
|
|
|
ice_for_each_q_vector(vsi, i) {
|
|
|
|
struct ice_q_vector *q_vector = vsi->q_vectors[i];
|
|
|
|
|
|
|
|
if (q_vector) {
|
2021-08-19 19:59:58 +08:00
|
|
|
q_vector->tx.tx_ring = NULL;
|
|
|
|
q_vector->rx.rx_ring = NULL;
|
2020-09-02 23:53:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:07 +08:00
|
|
|
if (vsi->tx_rings) {
|
2021-08-19 20:00:04 +08:00
|
|
|
ice_for_each_alloc_txq(vsi, i) {
|
2018-09-20 08:23:07 +08:00
|
|
|
if (vsi->tx_rings[i]) {
|
|
|
|
kfree_rcu(vsi->tx_rings[i], rcu);
|
2020-06-09 21:19:45 +08:00
|
|
|
WRITE_ONCE(vsi->tx_rings[i], NULL);
|
2018-09-20 08:23:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (vsi->rx_rings) {
|
2021-08-19 20:00:04 +08:00
|
|
|
ice_for_each_alloc_rxq(vsi, i) {
|
2018-09-20 08:23:07 +08:00
|
|
|
if (vsi->rx_rings[i]) {
|
|
|
|
kfree_rcu(vsi->rx_rings[i], rcu);
|
2020-06-09 21:19:45 +08:00
|
|
|
WRITE_ONCE(vsi->rx_rings[i], NULL);
|
2018-09-20 08:23:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_alloc_rings - Allocates Tx and Rx rings for the VSI
|
|
|
|
* @vsi: VSI which is having rings allocated
|
|
|
|
*/
|
2018-09-20 08:23:10 +08:00
|
|
|
static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
|
2018-09-20 08:23:07 +08:00
|
|
|
{
|
ice: Add hot path support for 802.1Q and 802.1ad VLAN offloads
Currently the driver only supports 802.1Q VLAN insertion and stripping.
However, once Double VLAN Mode (DVM) is fully supported, then both 802.1Q
and 802.1ad VLAN insertion and stripping will be supported. Unfortunately
the VSI context parameters only allow for one VLAN ethertype at a time
for VLAN offloads so only one or the other VLAN ethertype offload can be
supported at once.
To support this, multiple changes are needed.
Rx path changes:
[1] In DVM, the Rx queue context l2tagsel field needs to be cleared so
the outermost tag shows up in the l2tag2_2nd field of the Rx flex
descriptor. In Single VLAN Mode (SVM), the l2tagsel field should remain
1 to support SVM configurations.
[2] Modify the ice_test_staterr() function to take a __le16 instead of
the ice_32b_rx_flex_desc union pointer so this function can be used for
both rx_desc->wb.status_error0 and rx_desc->wb.status_error1.
[3] Add the new inline function ice_get_vlan_tag_from_rx_desc() that
checks if there is a VLAN tag in l2tag1 or l2tag2_2nd.
[4] In ice_receive_skb(), add a check to see if NETIF_F_HW_VLAN_STAG_RX
is enabled in netdev->features. If it is, then this is the VLAN
ethertype that needs to be added to the stripping VLAN tag. Since
ice_fix_features() prevents CTAG_RX and STAG_RX from being enabled
simultaneously, the VLAN ethertype will only ever be 802.1Q or 802.1ad.
Tx path changes:
[1] In DVM, the VLAN tag needs to be placed in the l2tag2 field of the Tx
context descriptor. The new define ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN was
added to the list of tx_flags to handle this case.
[2] When the stack requests the VLAN tag to be offloaded on Tx, the
driver needs to set either ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN or
ICE_TX_FLAGS_HW_VLAN, so the tag is inserted in l2tag2 or l2tag1
respectively. To determine which location to use, set a bit in the Tx
ring flags field during ring allocation that can be used to determine
which field to use in the Tx descriptor. In DVM, always use l2tag2,
and in SVM, always use l2tag1.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-12-03 00:38:47 +08:00
|
|
|
bool dvm_ena = ice_is_dvm_ena(&vsi->back->hw);
|
2018-09-20 08:23:07 +08:00
|
|
|
struct ice_pf *pf = vsi->back;
|
2019-11-08 22:23:26 +08:00
|
|
|
struct device *dev;
|
2020-05-08 08:41:05 +08:00
|
|
|
u16 i;
|
2018-09-20 08:23:07 +08:00
|
|
|
|
2019-11-08 22:23:26 +08:00
|
|
|
dev = ice_pf_to_dev(pf);
|
2018-10-27 02:44:47 +08:00
|
|
|
/* Allocate Tx rings */
|
2021-08-19 20:00:04 +08:00
|
|
|
ice_for_each_alloc_txq(vsi, i) {
|
2021-08-19 19:59:58 +08:00
|
|
|
struct ice_tx_ring *ring;
|
2018-09-20 08:23:07 +08:00
|
|
|
|
|
|
|
/* allocate with kzalloc(), free with kfree_rcu() */
|
|
|
|
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
|
|
|
|
|
|
|
|
if (!ring)
|
|
|
|
goto err_out;
|
|
|
|
|
|
|
|
ring->q_index = i;
|
|
|
|
ring->reg_idx = vsi->txq_map[i];
|
|
|
|
ring->vsi = vsi;
|
ice: enable transmit timestamps for E810 devices
Add support for enabling Tx timestamp requests for outgoing packets on
E810 devices.
The ice hardware can support multiple outstanding Tx timestamp requests.
When sending a descriptor to hardware, a Tx timestamp request is made by
setting a request bit, and assigning an index that represents which Tx
timestamp index to store the timestamp in.
Hardware makes no effort to synchronize the index use, so it is up to
software to ensure that Tx timestamp indexes are not re-used before the
timestamp is reported back.
To do this, introduce a Tx timestamp tracker which will keep track of
currently in-use indexes.
In the hot path, if a packet has a timestamp request, an index will be
requested from the tracker. Unfortunately, this does require a lock as
the indexes are shared across all queues on a PHY. There are not enough
indexes to reliably assign only 1 to each queue.
For the E810 devices, the timestamp indexes are not shared across PHYs,
so each port can have its own tracking.
Once hardware captures a timestamp, an interrupt is fired. In this
interrupt, trigger a new work item that will figure out which timestamp
was completed, and report the timestamp back to the stack.
This function loops through the Tx timestamp indexes and checks whether
there is now a valid timestamp. If so, it clears the PHY timestamp
indication in the PHY memory, locks and removes the SKB and bit in the
tracker, then reports the timestamp to the stack.
It is possible in some cases that a timestamp request will be initiated
but never completed. This might occur if the packet is dropped by
software or hardware before it reaches the PHY.
Add a task to the periodic work function that will check whether
a timestamp request is more than a few seconds old. If so, the timestamp
index is cleared in the PHY, and the SKB is released.
Just as with Rx timestamps, the Tx timestamps are only 40 bits wide, and
use the same overall logic for extending to 64 bits of nanoseconds.
With this change, E810 devices should be able to perform basic PTP
functionality.
Future changes will extend the support to cover the E822-based devices.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-06-10 00:39:53 +08:00
|
|
|
ring->tx_tstamps = &pf->ptp.port.tx;
|
2019-11-08 22:23:26 +08:00
|
|
|
ring->dev = dev;
|
2019-02-09 04:50:59 +08:00
|
|
|
ring->count = vsi->num_tx_desc;
|
ice: Add hot path support for 802.1Q and 802.1ad VLAN offloads
Currently the driver only supports 802.1Q VLAN insertion and stripping.
However, once Double VLAN Mode (DVM) is fully supported, then both 802.1Q
and 802.1ad VLAN insertion and stripping will be supported. Unfortunately
the VSI context parameters only allow for one VLAN ethertype at a time
for VLAN offloads so only one or the other VLAN ethertype offload can be
supported at once.
To support this, multiple changes are needed.
Rx path changes:
[1] In DVM, the Rx queue context l2tagsel field needs to be cleared so
the outermost tag shows up in the l2tag2_2nd field of the Rx flex
descriptor. In Single VLAN Mode (SVM), the l2tagsel field should remain
1 to support SVM configurations.
[2] Modify the ice_test_staterr() function to take a __le16 instead of
the ice_32b_rx_flex_desc union pointer so this function can be used for
both rx_desc->wb.status_error0 and rx_desc->wb.status_error1.
[3] Add the new inline function ice_get_vlan_tag_from_rx_desc() that
checks if there is a VLAN tag in l2tag1 or l2tag2_2nd.
[4] In ice_receive_skb(), add a check to see if NETIF_F_HW_VLAN_STAG_RX
is enabled in netdev->features. If it is, then this is the VLAN
ethertype that needs to be added to the stripping VLAN tag. Since
ice_fix_features() prevents CTAG_RX and STAG_RX from being enabled
simultaneously, the VLAN ethertype will only ever be 802.1Q or 802.1ad.
Tx path changes:
[1] In DVM, the VLAN tag needs to be placed in the l2tag2 field of the Tx
context descriptor. The new define ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN was
added to the list of tx_flags to handle this case.
[2] When the stack requests the VLAN tag to be offloaded on Tx, the
driver needs to set either ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN or
ICE_TX_FLAGS_HW_VLAN, so the tag is inserted in l2tag2 or l2tag1
respectively. To determine which location to use, set a bit in the Tx
ring flags field during ring allocation that can be used to determine
which field to use in the Tx descriptor. In DVM, always use l2tag2,
and in SVM, always use l2tag1.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-12-03 00:38:47 +08:00
|
|
|
if (dvm_ena)
|
|
|
|
ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG2;
|
|
|
|
else
|
|
|
|
ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG1;
|
2020-06-09 21:19:45 +08:00
|
|
|
WRITE_ONCE(vsi->tx_rings[i], ring);
|
2018-09-20 08:23:07 +08:00
|
|
|
}
|
|
|
|
|
2018-10-27 02:44:47 +08:00
|
|
|
/* Allocate Rx rings */
|
2021-08-19 20:00:04 +08:00
|
|
|
ice_for_each_alloc_rxq(vsi, i) {
|
2021-08-19 19:59:58 +08:00
|
|
|
struct ice_rx_ring *ring;
|
2018-09-20 08:23:07 +08:00
|
|
|
|
|
|
|
/* allocate with kzalloc(), free with kfree_rcu() */
|
|
|
|
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
|
|
|
|
if (!ring)
|
|
|
|
goto err_out;
|
|
|
|
|
|
|
|
ring->q_index = i;
|
|
|
|
ring->reg_idx = vsi->rxq_map[i];
|
|
|
|
ring->vsi = vsi;
|
|
|
|
ring->netdev = vsi->netdev;
|
2019-11-08 22:23:26 +08:00
|
|
|
ring->dev = dev;
|
2019-02-09 04:50:59 +08:00
|
|
|
ring->count = vsi->num_rx_desc;
|
2020-06-09 21:19:45 +08:00
|
|
|
WRITE_ONCE(vsi->rx_rings[i], ring);
|
2018-09-20 08:23:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_out:
|
|
|
|
ice_vsi_clear_rings(vsi);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:17 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_manage_rss_lut - disable/enable RSS
|
|
|
|
* @vsi: the VSI being changed
|
|
|
|
* @ena: boolean value indicating if this is an enable or disable request
|
|
|
|
*
|
|
|
|
* In the event of disable request for RSS, this function will zero out RSS
|
|
|
|
* LUT, while in the event of enable request for RSS, it will reconfigure RSS
|
|
|
|
* LUT.
|
|
|
|
*/
|
2021-04-01 05:17:07 +08:00
|
|
|
void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
|
2018-09-20 08:23:17 +08:00
|
|
|
{
|
|
|
|
u8 *lut;
|
|
|
|
|
2019-11-08 22:23:25 +08:00
|
|
|
lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
|
2018-09-20 08:23:17 +08:00
|
|
|
if (!lut)
|
2021-04-01 05:17:07 +08:00
|
|
|
return;
|
2018-09-20 08:23:17 +08:00
|
|
|
|
|
|
|
if (ena) {
|
|
|
|
if (vsi->rss_lut_user)
|
|
|
|
memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
|
|
|
|
else
|
|
|
|
ice_fill_rss_lut(lut, vsi->rss_table_size,
|
|
|
|
vsi->rss_size);
|
|
|
|
}
|
|
|
|
|
2021-04-01 05:17:07 +08:00
|
|
|
ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
|
2019-11-08 22:23:25 +08:00
|
|
|
kfree(lut);
|
2018-09-20 08:23:17 +08:00
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:10 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI
|
|
|
|
* @vsi: VSI to be configured
|
|
|
|
*/
|
2021-10-16 07:35:15 +08:00
|
|
|
int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
|
2018-09-20 08:23:10 +08:00
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
2019-11-08 22:23:26 +08:00
|
|
|
struct device *dev;
|
2021-03-03 02:15:36 +08:00
|
|
|
u8 *lut, *key;
|
|
|
|
int err;
|
2018-09-20 08:23:10 +08:00
|
|
|
|
2019-11-08 22:23:26 +08:00
|
|
|
dev = ice_pf_to_dev(pf);
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi->type == ICE_VSI_PF && vsi->ch_rss_size &&
|
|
|
|
(test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))) {
|
|
|
|
vsi->rss_size = min_t(u16, vsi->rss_size, vsi->ch_rss_size);
|
|
|
|
} else {
|
|
|
|
vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq);
|
|
|
|
|
|
|
|
/* If orig_rss_size is valid and it is less than determined
|
|
|
|
* main VSI's rss_size, update main VSI's rss_size to be
|
|
|
|
* orig_rss_size so that when tc-qdisc is deleted, main VSI
|
|
|
|
* RSS table gets programmed to be correct (whatever it was
|
|
|
|
* to begin with (prior to setup-tc for ADQ config)
|
|
|
|
*/
|
|
|
|
if (vsi->orig_rss_size && vsi->rss_size < vsi->orig_rss_size &&
|
|
|
|
vsi->orig_rss_size <= vsi->num_rxq) {
|
|
|
|
vsi->rss_size = vsi->orig_rss_size;
|
|
|
|
/* now orig_rss_size is used, reset it to zero */
|
|
|
|
vsi->orig_rss_size = 0;
|
|
|
|
}
|
|
|
|
}
|
2018-09-20 08:23:10 +08:00
|
|
|
|
2019-11-08 22:23:25 +08:00
|
|
|
lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
|
2018-09-20 08:23:10 +08:00
|
|
|
if (!lut)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
if (vsi->rss_lut_user)
|
|
|
|
memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
|
|
|
|
else
|
|
|
|
ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
|
|
|
|
|
2021-03-03 02:15:36 +08:00
|
|
|
err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
|
|
|
|
if (err) {
|
|
|
|
dev_err(dev, "set_rss_lut failed, error %d\n", err);
|
2018-09-20 08:23:10 +08:00
|
|
|
goto ice_vsi_cfg_rss_exit;
|
|
|
|
}
|
|
|
|
|
2021-03-03 02:15:36 +08:00
|
|
|
key = kzalloc(ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE, GFP_KERNEL);
|
2018-09-20 08:23:10 +08:00
|
|
|
if (!key) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto ice_vsi_cfg_rss_exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vsi->rss_hkey_user)
|
2021-03-03 02:15:36 +08:00
|
|
|
memcpy(key, vsi->rss_hkey_user, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
|
2018-09-20 08:23:10 +08:00
|
|
|
else
|
2021-03-03 02:15:36 +08:00
|
|
|
netdev_rss_key_fill((void *)key, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
|
2018-09-20 08:23:10 +08:00
|
|
|
|
2021-03-03 02:15:36 +08:00
|
|
|
err = ice_set_rss_key(vsi, key);
|
|
|
|
if (err)
|
|
|
|
dev_err(dev, "set_rss_key failed, error %d\n", err);
|
2018-09-20 08:23:10 +08:00
|
|
|
|
2019-11-08 22:23:25 +08:00
|
|
|
kfree(key);
|
2018-09-20 08:23:10 +08:00
|
|
|
ice_vsi_cfg_rss_exit:
|
2019-11-08 22:23:25 +08:00
|
|
|
kfree(lut);
|
2018-09-20 08:23:10 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-01-17 23:39:17 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_set_vf_rss_flow_fld - Sets VF VSI RSS input set for different flows
|
|
|
|
* @vsi: VSI to be configured
|
|
|
|
*
|
|
|
|
* This function will only be called during the VF VSI setup. Upon successful
|
|
|
|
* completion of package download, this function will configure default RSS
|
|
|
|
* input sets for VF VSI.
|
|
|
|
*/
|
|
|
|
static void ice_vsi_set_vf_rss_flow_fld(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
struct device *dev;
|
2021-10-08 06:59:03 +08:00
|
|
|
int status;
|
2020-01-17 23:39:17 +08:00
|
|
|
|
|
|
|
dev = ice_pf_to_dev(pf);
|
|
|
|
if (ice_is_safe_mode(pf)) {
|
|
|
|
dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
|
|
|
|
vsi->vsi_num);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
status = ice_add_avf_rss_cfg(&pf->hw, vsi->idx, ICE_DEFAULT_RSS_HENA);
|
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_dbg(dev, "ice_add_avf_rss_cfg failed for vsi = %d, error = %d\n",
|
|
|
|
vsi->vsi_num, status);
|
2020-01-17 23:39:17 +08:00
|
|
|
}
|
|
|
|
|
2020-01-17 23:39:12 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_set_rss_flow_fld - Sets RSS input set for different flows
|
|
|
|
* @vsi: VSI to be configured
|
|
|
|
*
|
|
|
|
* This function will only be called after successful download package call
|
|
|
|
* during initialization of PF. Since the downloaded package will erase the
|
|
|
|
* RSS section, this function will configure RSS input sets for different
|
|
|
|
* flow types. The last profile added has the highest priority, therefore 2
|
|
|
|
* tuple profiles (i.e. IPv4 src/dst) are added before 4 tuple profiles
|
|
|
|
* (i.e. IPv4 src/dst TCP src/dst port).
|
|
|
|
*/
|
|
|
|
static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
u16 vsi_handle = vsi->idx, vsi_num = vsi->vsi_num;
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
struct ice_hw *hw = &pf->hw;
|
|
|
|
struct device *dev;
|
2021-10-08 06:59:03 +08:00
|
|
|
int status;
|
2020-01-17 23:39:12 +08:00
|
|
|
|
|
|
|
dev = ice_pf_to_dev(pf);
|
|
|
|
if (ice_is_safe_mode(pf)) {
|
|
|
|
dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
|
|
|
|
vsi_num);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* configure RSS for IPv4 with input set IP src/dst */
|
|
|
|
status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
|
|
|
|
ICE_FLOW_SEG_HDR_IPV4);
|
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_dbg(dev, "ice_add_rss_cfg failed for ipv4 flow, vsi = %d, error = %d\n",
|
|
|
|
vsi_num, status);
|
2020-01-17 23:39:12 +08:00
|
|
|
|
|
|
|
/* configure RSS for IPv6 with input set IPv6 src/dst */
|
|
|
|
status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
|
|
|
|
ICE_FLOW_SEG_HDR_IPV6);
|
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_dbg(dev, "ice_add_rss_cfg failed for ipv6 flow, vsi = %d, error = %d\n",
|
|
|
|
vsi_num, status);
|
2020-01-17 23:39:12 +08:00
|
|
|
|
|
|
|
/* configure RSS for tcp4 with input set IP src/dst, TCP src/dst */
|
|
|
|
status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV4,
|
|
|
|
ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4);
|
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_dbg(dev, "ice_add_rss_cfg failed for tcp4 flow, vsi = %d, error = %d\n",
|
|
|
|
vsi_num, status);
|
2020-01-17 23:39:12 +08:00
|
|
|
|
|
|
|
/* configure RSS for udp4 with input set IP src/dst, UDP src/dst */
|
|
|
|
status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV4,
|
|
|
|
ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4);
|
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_dbg(dev, "ice_add_rss_cfg failed for udp4 flow, vsi = %d, error = %d\n",
|
|
|
|
vsi_num, status);
|
2020-01-17 23:39:12 +08:00
|
|
|
|
|
|
|
/* configure RSS for sctp4 with input set IP src/dst */
|
|
|
|
status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
|
|
|
|
ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4);
|
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_dbg(dev, "ice_add_rss_cfg failed for sctp4 flow, vsi = %d, error = %d\n",
|
|
|
|
vsi_num, status);
|
2020-01-17 23:39:12 +08:00
|
|
|
|
|
|
|
/* configure RSS for tcp6 with input set IPv6 src/dst, TCP src/dst */
|
|
|
|
status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV6,
|
|
|
|
ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6);
|
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_dbg(dev, "ice_add_rss_cfg failed for tcp6 flow, vsi = %d, error = %d\n",
|
|
|
|
vsi_num, status);
|
2020-01-17 23:39:12 +08:00
|
|
|
|
|
|
|
/* configure RSS for udp6 with input set IPv6 src/dst, UDP src/dst */
|
|
|
|
status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV6,
|
|
|
|
ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6);
|
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_dbg(dev, "ice_add_rss_cfg failed for udp6 flow, vsi = %d, error = %d\n",
|
|
|
|
vsi_num, status);
|
2020-01-17 23:39:12 +08:00
|
|
|
|
|
|
|
/* configure RSS for sctp6 with input set IPv6 src/dst */
|
|
|
|
status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
|
|
|
|
ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6);
|
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %d\n",
|
|
|
|
vsi_num, status);
|
2022-02-12 01:14:18 +08:00
|
|
|
|
|
|
|
status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_ESP_SPI,
|
|
|
|
ICE_FLOW_SEG_HDR_ESP);
|
|
|
|
if (status)
|
|
|
|
dev_dbg(dev, "ice_add_rss_cfg failed for esp/spi flow, vsi = %d, error = %d\n",
|
|
|
|
vsi_num, status);
|
2020-01-17 23:39:12 +08:00
|
|
|
}
|
|
|
|
|
2020-07-10 00:16:03 +08:00
|
|
|
/**
|
|
|
|
* ice_pf_state_is_nominal - checks the PF for nominal state
|
|
|
|
* @pf: pointer to PF to check
|
|
|
|
*
|
|
|
|
* Check the PF's state for a collection of bits that would indicate
|
|
|
|
* the PF is in a state that would inhibit normal operation for
|
|
|
|
* driver functionality.
|
|
|
|
*
|
|
|
|
* Returns true if PF is in a nominal state, false otherwise
|
|
|
|
*/
|
|
|
|
bool ice_pf_state_is_nominal(struct ice_pf *pf)
|
|
|
|
{
|
2021-03-03 02:15:38 +08:00
|
|
|
DECLARE_BITMAP(check_bits, ICE_STATE_NBITS) = { 0 };
|
2020-07-10 00:16:03 +08:00
|
|
|
|
|
|
|
if (!pf)
|
|
|
|
return false;
|
|
|
|
|
2021-03-03 02:15:38 +08:00
|
|
|
bitmap_set(check_bits, 0, ICE_STATE_NOMINAL_CHECK_BITS);
|
|
|
|
if (bitmap_intersects(pf->state, check_bits, ICE_STATE_NBITS))
|
2020-07-10 00:16:03 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:04 +08:00
|
|
|
/**
|
|
|
|
* ice_update_eth_stats - Update VSI-specific ethernet statistics counters
|
|
|
|
* @vsi: the VSI to be updated
|
|
|
|
*/
|
|
|
|
void ice_update_eth_stats(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
struct ice_eth_stats *prev_es, *cur_es;
|
|
|
|
struct ice_hw *hw = &vsi->back->hw;
|
|
|
|
u16 vsi_num = vsi->vsi_num; /* HW absolute index of a VSI */
|
|
|
|
|
|
|
|
prev_es = &vsi->eth_stats_prev;
|
|
|
|
cur_es = &vsi->eth_stats;
|
|
|
|
|
2019-06-26 17:20:13 +08:00
|
|
|
ice_stat_update40(hw, GLV_GORCL(vsi_num), vsi->stat_offsets_loaded,
|
|
|
|
&prev_es->rx_bytes, &cur_es->rx_bytes);
|
2018-09-20 08:23:04 +08:00
|
|
|
|
2019-06-26 17:20:13 +08:00
|
|
|
ice_stat_update40(hw, GLV_UPRCL(vsi_num), vsi->stat_offsets_loaded,
|
|
|
|
&prev_es->rx_unicast, &cur_es->rx_unicast);
|
2018-09-20 08:23:04 +08:00
|
|
|
|
2019-06-26 17:20:13 +08:00
|
|
|
ice_stat_update40(hw, GLV_MPRCL(vsi_num), vsi->stat_offsets_loaded,
|
|
|
|
&prev_es->rx_multicast, &cur_es->rx_multicast);
|
2018-09-20 08:23:04 +08:00
|
|
|
|
2019-06-26 17:20:13 +08:00
|
|
|
ice_stat_update40(hw, GLV_BPRCL(vsi_num), vsi->stat_offsets_loaded,
|
|
|
|
&prev_es->rx_broadcast, &cur_es->rx_broadcast);
|
2018-09-20 08:23:04 +08:00
|
|
|
|
|
|
|
ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded,
|
|
|
|
&prev_es->rx_discards, &cur_es->rx_discards);
|
|
|
|
|
2019-06-26 17:20:13 +08:00
|
|
|
ice_stat_update40(hw, GLV_GOTCL(vsi_num), vsi->stat_offsets_loaded,
|
|
|
|
&prev_es->tx_bytes, &cur_es->tx_bytes);
|
2018-09-20 08:23:04 +08:00
|
|
|
|
2019-06-26 17:20:13 +08:00
|
|
|
ice_stat_update40(hw, GLV_UPTCL(vsi_num), vsi->stat_offsets_loaded,
|
|
|
|
&prev_es->tx_unicast, &cur_es->tx_unicast);
|
2018-09-20 08:23:04 +08:00
|
|
|
|
2019-06-26 17:20:13 +08:00
|
|
|
ice_stat_update40(hw, GLV_MPTCL(vsi_num), vsi->stat_offsets_loaded,
|
|
|
|
&prev_es->tx_multicast, &cur_es->tx_multicast);
|
2018-09-20 08:23:04 +08:00
|
|
|
|
2019-06-26 17:20:13 +08:00
|
|
|
ice_stat_update40(hw, GLV_BPTCL(vsi_num), vsi->stat_offsets_loaded,
|
|
|
|
&prev_es->tx_broadcast, &cur_es->tx_broadcast);
|
2018-09-20 08:23:04 +08:00
|
|
|
|
|
|
|
ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded,
|
|
|
|
&prev_es->tx_errors, &cur_es->tx_errors);
|
|
|
|
|
|
|
|
vsi->stat_offsets_loaded = true;
|
|
|
|
}
|
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
|
|
|
|
* @vsi: VSI
|
|
|
|
*/
|
|
|
|
void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
|
|
|
|
{
|
2019-10-24 16:11:22 +08:00
|
|
|
if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
|
|
|
|
vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
|
|
|
|
vsi->rx_buf_len = ICE_RXBUF_2048;
|
|
|
|
#if (PAGE_SIZE < 8192)
|
2019-10-24 16:11:23 +08:00
|
|
|
} else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
|
|
|
|
(vsi->netdev->mtu <= ETH_DATA_LEN)) {
|
2019-10-24 16:11:22 +08:00
|
|
|
vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
|
|
|
|
vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
|
|
|
|
#endif
|
|
|
|
} else {
|
|
|
|
vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
|
|
|
|
#if (PAGE_SIZE < 8192)
|
|
|
|
vsi->rx_buf_len = ICE_RXBUF_3072;
|
|
|
|
#else
|
|
|
|
vsi->rx_buf_len = ICE_RXBUF_2048;
|
|
|
|
#endif
|
|
|
|
}
|
2019-11-05 01:38:56 +08:00
|
|
|
}
|
|
|
|
|
2020-05-16 08:54:59 +08:00
|
|
|
/**
|
|
|
|
* ice_write_qrxflxp_cntxt - write/configure QRXFLXP_CNTXT register
|
|
|
|
* @hw: HW pointer
|
|
|
|
* @pf_q: index of the Rx queue in the PF's queue space
|
|
|
|
* @rxdid: flexible descriptor RXDID
|
|
|
|
* @prio: priority for the RXDID for this queue
|
ice: enable receive hardware timestamping
Add SIOCGHWTSTAMP and SIOCSHWTSTAMP ioctl handlers to respond to
requests to enable timestamping support. If the request is for enabling
Rx timestamps, set a bit in the Rx descriptors to indicate that receive
timestamps should be reported.
Hardware captures receive timestamps in the PHY which only captures part
of the timer, and reports only 40 bits into the Rx descriptor. The upper
32 bits represent the contents of GLTSYN_TIME_L at the point of packet
reception, while the lower 8 bits represent the upper 8 bits of
GLTSYN_TIME_0.
The networking and PTP stack expect 64 bit timestamps in nanoseconds. To
support this, implement some logic to extend the timestamps by using the
full PHC time.
If the Rx timestamp was captured prior to the PHC time, then the real
timestamp is
PHC - (lower_32_bits(PHC) - timestamp)
If the Rx timestamp was captured after the PHC time, then the real
timestamp is
PHC + (timestamp - lower_32_bits(PHC))
These calculations are correct as long as neither the PHC timestamp nor
the Rx timestamps are more than 2^32-1 nanseconds old. Further, we can
detect when the Rx timestamp is before or after the PHC as long as the
PHC timestamp is no more than 2^31-1 nanoseconds old.
In that case, we calculate the delta between the lower 32 bits of the
PHC and the Rx timestamp. If it's larger than 2^31-1 then the Rx
timestamp must have been captured in the past. If it's smaller, then the
Rx timestamp must have been captured after PHC time.
Add an ice_ptp_extend_32b_ts function that relies on a cached copy of
the PHC time and implements this algorithm to calculate the proper upper
32bits of the Rx timestamps.
Cache the PHC time periodically in all of the Rx rings. This enables
each Rx ring to simply call the extension function with a recent copy of
the PHC time. By ensuring that the PHC time is kept up to date
periodically, we ensure this algorithm doesn't use stale data and
produce incorrect results.
To cache the time, introduce a kworker and a kwork item to periodically
store the Rx time. It might seem like we should use the .do_aux_work
interface of the PTP clock. This doesn't work because all PFs must cache
this time, but only one PF owns the PTP clock device.
Thus, the ice driver will manage its own kthread instead of relying on
the PTP do_aux_work handler.
With this change, the driver can now report Rx timestamps on all
incoming packets.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-06-10 00:39:52 +08:00
|
|
|
* @ena_ts: true to enable timestamp and false to disable timestamp
|
2020-05-16 08:54:59 +08:00
|
|
|
*/
|
|
|
|
void
|
ice: enable receive hardware timestamping
Add SIOCGHWTSTAMP and SIOCSHWTSTAMP ioctl handlers to respond to
requests to enable timestamping support. If the request is for enabling
Rx timestamps, set a bit in the Rx descriptors to indicate that receive
timestamps should be reported.
Hardware captures receive timestamps in the PHY which only captures part
of the timer, and reports only 40 bits into the Rx descriptor. The upper
32 bits represent the contents of GLTSYN_TIME_L at the point of packet
reception, while the lower 8 bits represent the upper 8 bits of
GLTSYN_TIME_0.
The networking and PTP stack expect 64 bit timestamps in nanoseconds. To
support this, implement some logic to extend the timestamps by using the
full PHC time.
If the Rx timestamp was captured prior to the PHC time, then the real
timestamp is
PHC - (lower_32_bits(PHC) - timestamp)
If the Rx timestamp was captured after the PHC time, then the real
timestamp is
PHC + (timestamp - lower_32_bits(PHC))
These calculations are correct as long as neither the PHC timestamp nor
the Rx timestamps are more than 2^32-1 nanseconds old. Further, we can
detect when the Rx timestamp is before or after the PHC as long as the
PHC timestamp is no more than 2^31-1 nanoseconds old.
In that case, we calculate the delta between the lower 32 bits of the
PHC and the Rx timestamp. If it's larger than 2^31-1 then the Rx
timestamp must have been captured in the past. If it's smaller, then the
Rx timestamp must have been captured after PHC time.
Add an ice_ptp_extend_32b_ts function that relies on a cached copy of
the PHC time and implements this algorithm to calculate the proper upper
32bits of the Rx timestamps.
Cache the PHC time periodically in all of the Rx rings. This enables
each Rx ring to simply call the extension function with a recent copy of
the PHC time. By ensuring that the PHC time is kept up to date
periodically, we ensure this algorithm doesn't use stale data and
produce incorrect results.
To cache the time, introduce a kworker and a kwork item to periodically
store the Rx time. It might seem like we should use the .do_aux_work
interface of the PTP clock. This doesn't work because all PFs must cache
this time, but only one PF owns the PTP clock device.
Thus, the ice driver will manage its own kthread instead of relying on
the PTP do_aux_work handler.
With this change, the driver can now report Rx timestamps on all
incoming packets.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-06-10 00:39:52 +08:00
|
|
|
ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
|
|
|
|
bool ena_ts)
|
2020-05-16 08:54:59 +08:00
|
|
|
{
|
|
|
|
int regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
|
|
|
|
|
|
|
|
/* clear any previous values */
|
|
|
|
regval &= ~(QRXFLXP_CNTXT_RXDID_IDX_M |
|
|
|
|
QRXFLXP_CNTXT_RXDID_PRIO_M |
|
|
|
|
QRXFLXP_CNTXT_TS_M);
|
|
|
|
|
|
|
|
regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
|
|
|
|
QRXFLXP_CNTXT_RXDID_IDX_M;
|
|
|
|
|
|
|
|
regval |= (prio << QRXFLXP_CNTXT_RXDID_PRIO_S) &
|
|
|
|
QRXFLXP_CNTXT_RXDID_PRIO_M;
|
|
|
|
|
ice: enable receive hardware timestamping
Add SIOCGHWTSTAMP and SIOCSHWTSTAMP ioctl handlers to respond to
requests to enable timestamping support. If the request is for enabling
Rx timestamps, set a bit in the Rx descriptors to indicate that receive
timestamps should be reported.
Hardware captures receive timestamps in the PHY which only captures part
of the timer, and reports only 40 bits into the Rx descriptor. The upper
32 bits represent the contents of GLTSYN_TIME_L at the point of packet
reception, while the lower 8 bits represent the upper 8 bits of
GLTSYN_TIME_0.
The networking and PTP stack expect 64 bit timestamps in nanoseconds. To
support this, implement some logic to extend the timestamps by using the
full PHC time.
If the Rx timestamp was captured prior to the PHC time, then the real
timestamp is
PHC - (lower_32_bits(PHC) - timestamp)
If the Rx timestamp was captured after the PHC time, then the real
timestamp is
PHC + (timestamp - lower_32_bits(PHC))
These calculations are correct as long as neither the PHC timestamp nor
the Rx timestamps are more than 2^32-1 nanseconds old. Further, we can
detect when the Rx timestamp is before or after the PHC as long as the
PHC timestamp is no more than 2^31-1 nanoseconds old.
In that case, we calculate the delta between the lower 32 bits of the
PHC and the Rx timestamp. If it's larger than 2^31-1 then the Rx
timestamp must have been captured in the past. If it's smaller, then the
Rx timestamp must have been captured after PHC time.
Add an ice_ptp_extend_32b_ts function that relies on a cached copy of
the PHC time and implements this algorithm to calculate the proper upper
32bits of the Rx timestamps.
Cache the PHC time periodically in all of the Rx rings. This enables
each Rx ring to simply call the extension function with a recent copy of
the PHC time. By ensuring that the PHC time is kept up to date
periodically, we ensure this algorithm doesn't use stale data and
produce incorrect results.
To cache the time, introduce a kworker and a kwork item to periodically
store the Rx time. It might seem like we should use the .do_aux_work
interface of the PTP clock. This doesn't work because all PFs must cache
this time, but only one PF owns the PTP clock device.
Thus, the ice driver will manage its own kthread instead of relying on
the PTP do_aux_work handler.
With this change, the driver can now report Rx timestamps on all
incoming packets.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-06-10 00:39:52 +08:00
|
|
|
if (ena_ts)
|
|
|
|
/* Enable TimeSync on this queue */
|
|
|
|
regval |= QRXFLXP_CNTXT_TS_M;
|
|
|
|
|
2020-05-16 08:54:59 +08:00
|
|
|
wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
|
|
|
|
}
|
|
|
|
|
2021-03-03 02:15:40 +08:00
|
|
|
int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx)
|
|
|
|
{
|
|
|
|
if (q_idx >= vsi->num_rxq)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return ice_vsi_cfg_rxq(vsi->rx_rings[q_idx]);
|
|
|
|
}
|
|
|
|
|
2021-08-19 19:59:58 +08:00
|
|
|
int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, u16 q_idx)
|
2021-03-03 02:15:40 +08:00
|
|
|
{
|
|
|
|
struct ice_aqc_add_tx_qgrp *qg_buf;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (q_idx >= vsi->alloc_txq || !tx_rings || !tx_rings[q_idx])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
qg_buf = kzalloc(struct_size(qg_buf, txqs, 1), GFP_KERNEL);
|
|
|
|
if (!qg_buf)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
qg_buf->num_txqs = 1;
|
|
|
|
|
|
|
|
err = ice_vsi_cfg_txq(vsi, tx_rings[q_idx], qg_buf);
|
|
|
|
kfree(qg_buf);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:05 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_cfg_rxqs - Configure the VSI for Rx
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
*
|
|
|
|
* Return 0 on success and a negative value on error
|
|
|
|
* Configure the Rx VSI for operation.
|
|
|
|
*/
|
|
|
|
int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
u16 i;
|
|
|
|
|
2018-09-20 08:42:56 +08:00
|
|
|
if (vsi->type == ICE_VSI_VF)
|
|
|
|
goto setup_rings;
|
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
ice_vsi_cfg_frame_size(vsi);
|
2018-09-20 08:42:56 +08:00
|
|
|
setup_rings:
|
2018-09-20 08:23:05 +08:00
|
|
|
/* set up individual rings */
|
2020-11-21 08:39:32 +08:00
|
|
|
ice_for_each_rxq(vsi, i) {
|
|
|
|
int err = ice_vsi_cfg_rxq(vsi->rx_rings[i]);
|
2018-09-20 08:23:05 +08:00
|
|
|
|
2020-11-21 08:39:32 +08:00
|
|
|
if (err)
|
2019-03-01 07:25:51 +08:00
|
|
|
return err;
|
2018-09-20 08:23:05 +08:00
|
|
|
}
|
2019-03-01 07:25:51 +08:00
|
|
|
|
|
|
|
return 0;
|
2018-09-20 08:23:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_cfg_txqs - Configure the VSI for Tx
|
|
|
|
* @vsi: the VSI being configured
|
2018-12-20 02:03:27 +08:00
|
|
|
* @rings: Tx ring array to be configured
|
2021-05-20 14:35:00 +08:00
|
|
|
* @count: number of Tx ring array elements
|
2018-09-20 08:23:05 +08:00
|
|
|
*
|
|
|
|
* Return 0 on success and a negative value on error
|
|
|
|
* Configure the Tx VSI for operation.
|
|
|
|
*/
|
2018-12-20 02:03:27 +08:00
|
|
|
static int
|
2021-08-19 19:59:58 +08:00
|
|
|
ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_tx_ring **rings, u16 count)
|
2018-09-20 08:23:05 +08:00
|
|
|
{
|
|
|
|
struct ice_aqc_add_tx_qgrp *qg_buf;
|
2019-10-24 16:11:18 +08:00
|
|
|
u16 q_idx = 0;
|
2019-08-02 16:25:19 +08:00
|
|
|
int err = 0;
|
2018-09-20 08:23:05 +08:00
|
|
|
|
2020-06-30 08:27:46 +08:00
|
|
|
qg_buf = kzalloc(struct_size(qg_buf, txqs, 1), GFP_KERNEL);
|
2018-09-20 08:23:05 +08:00
|
|
|
if (!qg_buf)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
qg_buf->num_txqs = 1;
|
|
|
|
|
2021-05-20 14:35:00 +08:00
|
|
|
for (q_idx = 0; q_idx < count; q_idx++) {
|
2019-10-24 16:11:18 +08:00
|
|
|
err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
|
|
|
|
if (err)
|
|
|
|
goto err_cfg_txqs;
|
2018-09-20 08:23:05 +08:00
|
|
|
}
|
2019-10-24 16:11:18 +08:00
|
|
|
|
2018-09-20 08:23:05 +08:00
|
|
|
err_cfg_txqs:
|
2019-10-24 16:11:18 +08:00
|
|
|
kfree(qg_buf);
|
2018-09-20 08:23:05 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2018-12-20 02:03:27 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_cfg_lan_txqs - Configure the VSI for Tx
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
*
|
|
|
|
* Return 0 on success and a negative value on error
|
|
|
|
* Configure the Tx VSI for operation.
|
|
|
|
*/
|
|
|
|
int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
|
|
|
|
{
|
2021-05-20 14:35:00 +08:00
|
|
|
return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
|
2018-12-20 02:03:27 +08:00
|
|
|
}
|
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
*
|
|
|
|
* Return 0 on success and a negative value on error
|
|
|
|
* Configure the Tx queues dedicated for XDP in given VSI for operation.
|
|
|
|
*/
|
|
|
|
int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
|
|
|
|
{
|
2019-11-05 01:38:56 +08:00
|
|
|
int ret;
|
|
|
|
int i;
|
|
|
|
|
2021-05-20 14:35:00 +08:00
|
|
|
ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
|
2019-11-05 01:38:56 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2021-08-19 20:00:04 +08:00
|
|
|
ice_for_each_xdp_txq(vsi, i)
|
2021-08-19 19:59:58 +08:00
|
|
|
vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]);
|
2019-11-05 01:38:56 +08:00
|
|
|
|
|
|
|
return ret;
|
2019-11-05 01:38:56 +08:00
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:19 +08:00
|
|
|
/**
|
|
|
|
* ice_intrl_usec_to_reg - convert interrupt rate limit to register value
|
|
|
|
* @intrl: interrupt rate limit in usecs
|
|
|
|
* @gran: interrupt rate limit granularity in usecs
|
|
|
|
*
|
|
|
|
* This function converts a decimal interrupt rate limit in usecs to the format
|
|
|
|
* expected by firmware.
|
|
|
|
*/
|
2021-04-01 05:16:56 +08:00
|
|
|
static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
|
2018-09-20 08:23:19 +08:00
|
|
|
{
|
|
|
|
u32 val = intrl / gran;
|
|
|
|
|
|
|
|
if (val)
|
|
|
|
return val | GLINT_RATE_INTRL_ENA_M;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-04-01 05:16:56 +08:00
|
|
|
/**
|
|
|
|
* ice_write_intrl - write throttle rate limit to interrupt specific register
|
|
|
|
* @q_vector: pointer to interrupt specific structure
|
|
|
|
* @intrl: throttle rate limit in microseconds to write
|
|
|
|
*/
|
|
|
|
void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl)
|
|
|
|
{
|
|
|
|
struct ice_hw *hw = &q_vector->vsi->back->hw;
|
|
|
|
|
|
|
|
wr32(hw, GLINT_RATE(q_vector->reg_idx),
|
|
|
|
ice_intrl_usec_to_reg(intrl, ICE_INTRL_GRAN_ABOVE_25));
|
|
|
|
}
|
|
|
|
|
2021-08-19 19:59:58 +08:00
|
|
|
static struct ice_q_vector *ice_pull_qvec_from_rc(struct ice_ring_container *rc)
|
|
|
|
{
|
|
|
|
switch (rc->type) {
|
|
|
|
case ICE_RX_CONTAINER:
|
|
|
|
if (rc->rx_ring)
|
|
|
|
return rc->rx_ring->q_vector;
|
|
|
|
break;
|
|
|
|
case ICE_TX_CONTAINER:
|
|
|
|
if (rc->tx_ring)
|
|
|
|
return rc->tx_ring->q_vector;
|
2021-10-19 09:42:03 +08:00
|
|
|
break;
|
2021-08-19 19:59:58 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-04-01 05:16:56 +08:00
|
|
|
/**
|
|
|
|
* __ice_write_itr - write throttle rate to register
|
|
|
|
* @q_vector: pointer to interrupt data structure
|
|
|
|
* @rc: pointer to ring container
|
|
|
|
* @itr: throttle rate in microseconds to write
|
|
|
|
*/
|
|
|
|
static void __ice_write_itr(struct ice_q_vector *q_vector,
|
|
|
|
struct ice_ring_container *rc, u16 itr)
|
|
|
|
{
|
|
|
|
struct ice_hw *hw = &q_vector->vsi->back->hw;
|
|
|
|
|
|
|
|
wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
|
|
|
|
ITR_REG_ALIGN(itr) >> ICE_ITR_GRAN_S);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_write_itr - write throttle rate to queue specific register
|
|
|
|
* @rc: pointer to ring container
|
|
|
|
* @itr: throttle rate in microseconds to write
|
|
|
|
*/
|
|
|
|
void ice_write_itr(struct ice_ring_container *rc, u16 itr)
|
|
|
|
{
|
|
|
|
struct ice_q_vector *q_vector;
|
|
|
|
|
2021-08-19 19:59:58 +08:00
|
|
|
q_vector = ice_pull_qvec_from_rc(rc);
|
|
|
|
if (!q_vector)
|
2021-04-01 05:16:56 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
__ice_write_itr(q_vector, rc, itr);
|
|
|
|
}
|
|
|
|
|
ice: update dim usage and moderation
The driver was having trouble with unreliable latency when doing single
threaded ping-pong tests. This was root caused to the DIM algorithm
landing on a too slow interrupt value, which caused high latency, and it
was especially present when queues were being switched frequently by the
scheduler as happens on default setups today.
In attempting to improve this, we allow the upper rate limit for
interrupts to move to rate limit of 4 microseconds as a max, which means
that no vector can generate more than 250,000 interrupts per second. The
old config was up to 100,000. The driver previously tried to program the
rate limit too frequently and if the receive and transmit side were both
active on the same vector, the INTRL would be set incorrectly, and this
change fixes that issue as a side effect of the redesign.
This driver will operate from now on with a slightly changed DIM table
with more emphasis towards latency sensitivity by having more table
entries with lower latency than with high latency (high being >= 64
microseconds).
The driver also resets the DIM algorithm state with a new stats set when
there is no work done and the data becomes stale (older than 1 second),
for the respective receive or transmit portion of the interrupt.
Add a new helper for setting rate limit, which will be used more
in a followup patch.
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-09-21 03:30:12 +08:00
|
|
|
/**
|
|
|
|
* ice_set_q_vector_intrl - set up interrupt rate limiting
|
|
|
|
* @q_vector: the vector to be configured
|
|
|
|
*
|
|
|
|
* Interrupt rate limiting is local to the vector, not per-queue so we must
|
|
|
|
* detect if either ring container has dynamic moderation enabled to decide
|
|
|
|
* what to set the interrupt rate limit to via INTRL settings. In the case that
|
|
|
|
* dynamic moderation is disabled on both, write the value with the cached
|
|
|
|
* setting to make sure INTRL register matches the user visible value.
|
|
|
|
*/
|
|
|
|
void ice_set_q_vector_intrl(struct ice_q_vector *q_vector)
|
|
|
|
{
|
|
|
|
if (ITR_IS_DYNAMIC(&q_vector->tx) || ITR_IS_DYNAMIC(&q_vector->rx)) {
|
|
|
|
/* in the case of dynamic enabled, cap each vector to no more
|
|
|
|
* than (4 us) 250,000 ints/sec, which allows low latency
|
|
|
|
* but still less than 500,000 interrupts per second, which
|
|
|
|
* reduces CPU a bit in the case of the lowest latency
|
|
|
|
* setting. The 4 here is a value in microseconds.
|
|
|
|
*/
|
|
|
|
ice_write_intrl(q_vector, 4);
|
|
|
|
} else {
|
|
|
|
ice_write_intrl(q_vector, q_vector->intrl);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:05 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
|
|
|
|
* @vsi: the VSI being configured
|
2019-04-17 01:34:51 +08:00
|
|
|
*
|
|
|
|
* This configures MSIX mode interrupts for the PF VSI, and should not be used
|
|
|
|
* for the VF VSI.
|
2018-09-20 08:23:05 +08:00
|
|
|
*/
|
|
|
|
void ice_vsi_cfg_msix(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
struct ice_hw *hw = &pf->hw;
|
2020-05-08 08:41:05 +08:00
|
|
|
u16 txq = 0, rxq = 0;
|
2018-09-20 08:43:05 +08:00
|
|
|
int i, q;
|
2018-09-20 08:23:05 +08:00
|
|
|
|
2021-08-19 20:00:04 +08:00
|
|
|
ice_for_each_q_vector(vsi, i) {
|
2018-09-20 08:23:05 +08:00
|
|
|
struct ice_q_vector *q_vector = vsi->q_vectors[i];
|
2019-03-01 07:25:59 +08:00
|
|
|
u16 reg_idx = q_vector->reg_idx;
|
2018-09-20 08:23:05 +08:00
|
|
|
|
2019-04-17 01:34:51 +08:00
|
|
|
ice_cfg_itr(hw, q_vector);
|
2018-09-20 08:23:19 +08:00
|
|
|
|
2018-09-20 08:23:05 +08:00
|
|
|
/* Both Transmit Queue Interrupt Cause Control register
|
|
|
|
* and Receive Queue Interrupt Cause control register
|
|
|
|
* expects MSIX_INDX field to be the vector index
|
|
|
|
* within the function space and not the absolute
|
|
|
|
* vector index across PF or across device.
|
|
|
|
* For SR-IOV VF VSIs queue vector index always starts
|
|
|
|
* with 1 since first vector index(0) is used for OICR
|
|
|
|
* in VF space. Since VMDq and other PF VSIs are within
|
|
|
|
* the PF function space, use the vector index that is
|
|
|
|
* tracked for this PF.
|
|
|
|
*/
|
|
|
|
for (q = 0; q < q_vector->num_ring_tx; q++) {
|
2019-04-17 01:34:51 +08:00
|
|
|
ice_cfg_txq_interrupt(vsi, txq, reg_idx,
|
|
|
|
q_vector->tx.itr_idx);
|
2018-09-20 08:23:05 +08:00
|
|
|
txq++;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (q = 0; q < q_vector->num_ring_rx; q++) {
|
2019-04-17 01:34:51 +08:00
|
|
|
ice_cfg_rxq_interrupt(vsi, rxq, reg_idx,
|
|
|
|
q_vector->rx.itr_idx);
|
2018-09-20 08:23:05 +08:00
|
|
|
rxq++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2020-01-22 23:21:29 +08:00
|
|
|
* ice_vsi_start_all_rx_rings - start/enable all of a VSI's Rx rings
|
|
|
|
* @vsi: the VSI whose rings are to be enabled
|
2018-09-20 08:23:05 +08:00
|
|
|
*
|
|
|
|
* Returns 0 on success and a negative value on error
|
|
|
|
*/
|
2020-01-22 23:21:29 +08:00
|
|
|
int ice_vsi_start_all_rx_rings(struct ice_vsi *vsi)
|
2018-09-20 08:23:05 +08:00
|
|
|
{
|
2020-01-22 23:21:29 +08:00
|
|
|
return ice_vsi_ctrl_all_rx_rings(vsi, true);
|
2018-09-20 08:23:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2020-01-22 23:21:29 +08:00
|
|
|
* ice_vsi_stop_all_rx_rings - stop/disable all of a VSI's Rx rings
|
|
|
|
* @vsi: the VSI whose rings are to be disabled
|
2018-09-20 08:23:05 +08:00
|
|
|
*
|
|
|
|
* Returns 0 on success and a negative value on error
|
|
|
|
*/
|
2020-01-22 23:21:29 +08:00
|
|
|
int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi)
|
2018-09-20 08:23:05 +08:00
|
|
|
{
|
2020-01-22 23:21:29 +08:00
|
|
|
return ice_vsi_ctrl_all_rx_rings(vsi, false);
|
2018-09-20 08:23:05 +08:00
|
|
|
}
|
|
|
|
|
2019-08-02 16:25:19 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_stop_tx_rings - Disable Tx rings
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
* @rst_src: reset source
|
|
|
|
* @rel_vmvf_num: Relative ID of VF/VM
|
|
|
|
* @rings: Tx ring array to be stopped
|
2021-05-20 14:35:00 +08:00
|
|
|
* @count: number of Tx ring array elements
|
2019-08-02 16:25:19 +08:00
|
|
|
*/
|
|
|
|
static int
|
|
|
|
ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
|
2021-08-19 19:59:58 +08:00
|
|
|
u16 rel_vmvf_num, struct ice_tx_ring **rings, u16 count)
|
2019-08-02 16:25:19 +08:00
|
|
|
{
|
2019-10-24 16:11:18 +08:00
|
|
|
u16 q_idx;
|
2019-08-02 16:25:19 +08:00
|
|
|
|
|
|
|
if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
|
|
|
|
return -EINVAL;
|
2018-09-20 08:23:05 +08:00
|
|
|
|
2021-05-20 14:35:00 +08:00
|
|
|
for (q_idx = 0; q_idx < count; q_idx++) {
|
2019-10-24 16:11:18 +08:00
|
|
|
struct ice_txq_meta txq_meta = { };
|
|
|
|
int status;
|
2019-04-17 01:30:51 +08:00
|
|
|
|
2019-10-24 16:11:18 +08:00
|
|
|
if (!rings || !rings[q_idx])
|
|
|
|
return -EINVAL;
|
2018-09-20 08:23:05 +08:00
|
|
|
|
2019-10-24 16:11:18 +08:00
|
|
|
ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
|
|
|
|
status = ice_vsi_stop_tx_ring(vsi, rst_src, rel_vmvf_num,
|
|
|
|
rings[q_idx], &txq_meta);
|
2018-09-20 08:23:05 +08:00
|
|
|
|
2019-10-24 16:11:18 +08:00
|
|
|
if (status)
|
|
|
|
return status;
|
2018-09-20 08:23:05 +08:00
|
|
|
}
|
|
|
|
|
2019-08-02 16:25:19 +08:00
|
|
|
return 0;
|
2018-09-20 08:23:05 +08:00
|
|
|
}
|
2018-09-20 08:23:06 +08:00
|
|
|
|
2018-12-20 02:03:27 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_stop_lan_tx_rings - Disable LAN Tx rings
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
* @rst_src: reset source
|
2019-02-20 07:04:13 +08:00
|
|
|
* @rel_vmvf_num: Relative ID of VF/VM
|
2018-12-20 02:03:27 +08:00
|
|
|
*/
|
2019-02-27 08:35:11 +08:00
|
|
|
int
|
|
|
|
ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
|
|
|
|
u16 rel_vmvf_num)
|
2018-12-20 02:03:27 +08:00
|
|
|
{
|
2021-05-20 14:35:00 +08:00
|
|
|
return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq);
|
2018-12-20 02:03:27 +08:00
|
|
|
}
|
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
*/
|
|
|
|
int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
|
|
|
|
{
|
2021-05-20 14:35:00 +08:00
|
|
|
return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq);
|
2019-11-05 01:38:56 +08:00
|
|
|
}
|
|
|
|
|
2020-01-22 23:21:24 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_is_vlan_pruning_ena - check if VLAN pruning is enabled or not
|
|
|
|
* @vsi: VSI to check whether or not VLAN pruning is enabled.
|
|
|
|
*
|
2020-02-28 02:14:59 +08:00
|
|
|
* returns true if Rx VLAN pruning is enabled and false otherwise.
|
2020-01-22 23:21:24 +08:00
|
|
|
*/
|
|
|
|
bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
if (!vsi)
|
|
|
|
return false;
|
|
|
|
|
2020-02-28 02:14:59 +08:00
|
|
|
return (vsi->info.sw_flags2 & ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA);
|
2020-01-22 23:21:24 +08:00
|
|
|
}
|
|
|
|
|
2019-03-01 07:24:24 +08:00
|
|
|
static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
|
|
|
|
{
|
2021-10-16 07:35:15 +08:00
|
|
|
if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) {
|
|
|
|
vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
|
|
|
|
vsi->tc_cfg.numtc = 1;
|
|
|
|
return;
|
|
|
|
}
|
2019-03-01 07:24:24 +08:00
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
/* set VSI TC information based on DCB config */
|
|
|
|
ice_vsi_set_dcb_tc_cfg(vsi);
|
2019-03-01 07:24:24 +08:00
|
|
|
}
|
|
|
|
|
2019-03-01 07:25:59 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_set_q_vectors_reg_idx - set the HW register index for all q_vectors
|
|
|
|
* @vsi: VSI to set the q_vectors register index on
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
u16 i;
|
|
|
|
|
|
|
|
if (!vsi || !vsi->q_vectors)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
ice_for_each_q_vector(vsi, i) {
|
|
|
|
struct ice_q_vector *q_vector = vsi->q_vectors[i];
|
|
|
|
|
|
|
|
if (!q_vector) {
|
2020-02-06 17:20:10 +08:00
|
|
|
dev_err(ice_pf_to_dev(vsi->back), "Failed to set reg_idx on q_vector %d VSI %d\n",
|
2019-03-01 07:25:59 +08:00
|
|
|
i, vsi->vsi_num);
|
|
|
|
goto clear_reg_idx;
|
|
|
|
}
|
|
|
|
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
if (vsi->type == ICE_VSI_VF) {
|
2022-02-17 05:37:29 +08:00
|
|
|
struct ice_vf *vf = vsi->vf;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
|
|
|
|
q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector);
|
|
|
|
} else {
|
|
|
|
q_vector->reg_idx =
|
|
|
|
q_vector->v_idx + vsi->base_vector;
|
|
|
|
}
|
2019-03-01 07:25:59 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
clear_reg_idx:
|
|
|
|
ice_for_each_q_vector(vsi, i) {
|
|
|
|
struct ice_q_vector *q_vector = vsi->q_vectors[i];
|
|
|
|
|
|
|
|
if (q_vector)
|
|
|
|
q_vector->reg_idx = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-04-17 01:34:55 +08:00
|
|
|
/**
|
|
|
|
* ice_cfg_sw_lldp - Config switch rules for LLDP packet handling
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
* @tx: bool to determine Tx or Rx rule
|
|
|
|
* @create: bool to determine create or remove Rule
|
|
|
|
*/
|
|
|
|
void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
|
|
|
|
{
|
2021-10-08 06:56:57 +08:00
|
|
|
int (*eth_fltr)(struct ice_vsi *v, u16 type, u16 flag,
|
|
|
|
enum ice_sw_fwd_act_type act);
|
2019-04-17 01:34:55 +08:00
|
|
|
struct ice_pf *pf = vsi->back;
|
2019-11-08 22:23:26 +08:00
|
|
|
struct device *dev;
|
2021-10-08 06:59:03 +08:00
|
|
|
int status;
|
2019-04-17 01:34:55 +08:00
|
|
|
|
2019-11-08 22:23:26 +08:00
|
|
|
dev = ice_pf_to_dev(pf);
|
2020-05-08 08:41:08 +08:00
|
|
|
eth_fltr = create ? ice_fltr_add_eth : ice_fltr_remove_eth;
|
2019-04-17 01:34:55 +08:00
|
|
|
|
2020-09-18 04:13:39 +08:00
|
|
|
if (tx) {
|
2020-05-08 08:41:08 +08:00
|
|
|
status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_TX,
|
|
|
|
ICE_DROP_PACKET);
|
2020-09-18 04:13:39 +08:00
|
|
|
} else {
|
|
|
|
if (ice_fw_supports_lldp_fltr_ctrl(&pf->hw)) {
|
|
|
|
status = ice_lldp_fltr_add_remove(&pf->hw, vsi->vsi_num,
|
|
|
|
create);
|
|
|
|
} else {
|
|
|
|
status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX,
|
|
|
|
ICE_FWD_TO_VSI);
|
|
|
|
}
|
|
|
|
}
|
2019-04-17 01:34:55 +08:00
|
|
|
|
|
|
|
if (status)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_dbg(dev, "Fail %s %s LLDP rule on VSI %i error: %d\n",
|
2019-04-17 01:34:55 +08:00
|
|
|
create ? "adding" : "removing", tx ? "TX" : "RX",
|
2021-10-08 06:56:02 +08:00
|
|
|
vsi->vsi_num, status);
|
2019-04-17 01:34:55 +08:00
|
|
|
}
|
|
|
|
|
2020-11-21 08:39:27 +08:00
|
|
|
/**
|
|
|
|
* ice_set_agg_vsi - sets up scheduler aggregator node and move VSI into it
|
|
|
|
* @vsi: pointer to the VSI
|
|
|
|
*
|
|
|
|
* This function will allocate new scheduler aggregator now if needed and will
|
|
|
|
* move specified VSI into it.
|
|
|
|
*/
|
|
|
|
static void ice_set_agg_vsi(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
struct device *dev = ice_pf_to_dev(vsi->back);
|
|
|
|
struct ice_agg_node *agg_node_iter = NULL;
|
|
|
|
u32 agg_id = ICE_INVALID_AGG_NODE_ID;
|
|
|
|
struct ice_agg_node *agg_node = NULL;
|
|
|
|
int node_offset, max_agg_nodes = 0;
|
|
|
|
struct ice_port_info *port_info;
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
u32 agg_node_id_start = 0;
|
2021-10-08 06:56:57 +08:00
|
|
|
int status;
|
2020-11-21 08:39:27 +08:00
|
|
|
|
|
|
|
/* create (as needed) scheduler aggregator node and move VSI into
|
|
|
|
* corresponding aggregator node
|
|
|
|
* - PF aggregator node to contains VSIs of type _PF and _CTRL
|
|
|
|
* - VF aggregator nodes will contain VF VSI
|
|
|
|
*/
|
|
|
|
port_info = pf->hw.port_info;
|
|
|
|
if (!port_info)
|
|
|
|
return;
|
|
|
|
|
|
|
|
switch (vsi->type) {
|
|
|
|
case ICE_VSI_CTRL:
|
2021-10-16 07:35:15 +08:00
|
|
|
case ICE_VSI_CHNL:
|
2020-11-21 08:39:27 +08:00
|
|
|
case ICE_VSI_LB:
|
|
|
|
case ICE_VSI_PF:
|
2021-08-20 08:08:55 +08:00
|
|
|
case ICE_VSI_SWITCHDEV_CTRL:
|
2020-11-21 08:39:27 +08:00
|
|
|
max_agg_nodes = ICE_MAX_PF_AGG_NODES;
|
|
|
|
agg_node_id_start = ICE_PF_AGG_NODE_ID_START;
|
|
|
|
agg_node_iter = &pf->pf_agg_node[0];
|
|
|
|
break;
|
|
|
|
case ICE_VSI_VF:
|
|
|
|
/* user can create 'n' VFs on a given PF, but since max children
|
|
|
|
* per aggregator node can be only 64. Following code handles
|
|
|
|
* aggregator(s) for VF VSIs, either selects a agg_node which
|
|
|
|
* was already created provided num_vsis < 64, otherwise
|
|
|
|
* select next available node, which will be created
|
|
|
|
*/
|
|
|
|
max_agg_nodes = ICE_MAX_VF_AGG_NODES;
|
|
|
|
agg_node_id_start = ICE_VF_AGG_NODE_ID_START;
|
|
|
|
agg_node_iter = &pf->vf_agg_node[0];
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* other VSI type, handle later if needed */
|
|
|
|
dev_dbg(dev, "unexpected VSI type %s\n",
|
|
|
|
ice_vsi_type_str(vsi->type));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* find the appropriate aggregator node */
|
|
|
|
for (node_offset = 0; node_offset < max_agg_nodes; node_offset++) {
|
|
|
|
/* see if we can find space in previously created
|
|
|
|
* node if num_vsis < 64, otherwise skip
|
|
|
|
*/
|
|
|
|
if (agg_node_iter->num_vsis &&
|
|
|
|
agg_node_iter->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) {
|
|
|
|
agg_node_iter++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (agg_node_iter->valid &&
|
|
|
|
agg_node_iter->agg_id != ICE_INVALID_AGG_NODE_ID) {
|
|
|
|
agg_id = agg_node_iter->agg_id;
|
|
|
|
agg_node = agg_node_iter;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* find unclaimed agg_id */
|
|
|
|
if (agg_node_iter->agg_id == ICE_INVALID_AGG_NODE_ID) {
|
|
|
|
agg_id = node_offset + agg_node_id_start;
|
|
|
|
agg_node = agg_node_iter;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* move to next agg_node */
|
|
|
|
agg_node_iter++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!agg_node)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* if selected aggregator node was not created, create it */
|
|
|
|
if (!agg_node->valid) {
|
|
|
|
status = ice_cfg_agg(port_info, agg_id, ICE_AGG_TYPE_AGG,
|
|
|
|
(u8)vsi->tc_cfg.ena_tc);
|
|
|
|
if (status) {
|
|
|
|
dev_err(dev, "unable to create aggregator node with agg_id %u\n",
|
|
|
|
agg_id);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* aggregator node is created, store the neeeded info */
|
|
|
|
agg_node->valid = true;
|
|
|
|
agg_node->agg_id = agg_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* move VSI to corresponding aggregator node */
|
|
|
|
status = ice_move_vsi_to_agg(port_info, agg_id, vsi->idx,
|
|
|
|
(u8)vsi->tc_cfg.ena_tc);
|
|
|
|
if (status) {
|
|
|
|
dev_err(dev, "unable to move VSI idx %u into aggregator %u node",
|
|
|
|
vsi->idx, agg_id);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* keep active children count for aggregator node */
|
|
|
|
agg_node->num_vsis++;
|
|
|
|
|
|
|
|
/* cache the 'agg_id' in VSI, so that after reset - VSI will be moved
|
|
|
|
* to aggregator node
|
|
|
|
*/
|
|
|
|
vsi->agg_node = agg_node;
|
|
|
|
dev_dbg(dev, "successfully moved VSI idx %u tc_bitmap 0x%x) into aggregator node %d which has num_vsis %u\n",
|
|
|
|
vsi->idx, vsi->tc_cfg.ena_tc, vsi->agg_node->agg_id,
|
|
|
|
vsi->agg_node->num_vsis);
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:10 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_setup - Set up a VSI by a given type
|
|
|
|
* @pf: board private structure
|
|
|
|
* @pi: pointer to the port_info instance
|
2020-02-28 02:15:04 +08:00
|
|
|
* @vsi_type: VSI type
|
2022-02-17 05:37:29 +08:00
|
|
|
* @vf: pointer to VF to which this VSI connects. This field is used primarily
|
|
|
|
* for the ICE_VSI_VF type. Other VSI types should pass NULL.
|
2021-10-16 07:35:15 +08:00
|
|
|
* @ch: ptr to channel
|
2018-09-20 08:23:10 +08:00
|
|
|
*
|
|
|
|
* This allocates the sw VSI structure and its queue resources.
|
|
|
|
*
|
|
|
|
* Returns pointer to the successfully allocated and configured VSI sw struct on
|
|
|
|
* success, NULL on failure.
|
|
|
|
*/
|
|
|
|
struct ice_vsi *
|
|
|
|
ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
|
2022-02-17 05:37:29 +08:00
|
|
|
enum ice_vsi_type vsi_type, struct ice_vf *vf,
|
|
|
|
struct ice_channel *ch)
|
2018-09-20 08:23:10 +08:00
|
|
|
{
|
|
|
|
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
|
2019-11-08 22:23:26 +08:00
|
|
|
struct device *dev = ice_pf_to_dev(pf);
|
2018-09-20 08:23:10 +08:00
|
|
|
struct ice_vsi *vsi;
|
|
|
|
int ret, i;
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi_type == ICE_VSI_CHNL)
|
2022-02-17 05:37:29 +08:00
|
|
|
vsi = ice_vsi_alloc(pf, vsi_type, ch, NULL);
|
2021-10-16 07:35:15 +08:00
|
|
|
else if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL)
|
2022-02-17 05:37:29 +08:00
|
|
|
vsi = ice_vsi_alloc(pf, vsi_type, NULL, vf);
|
2019-02-27 08:35:09 +08:00
|
|
|
else
|
2022-02-17 05:37:29 +08:00
|
|
|
vsi = ice_vsi_alloc(pf, vsi_type, NULL, NULL);
|
2019-02-27 08:35:09 +08:00
|
|
|
|
2018-09-20 08:23:10 +08:00
|
|
|
if (!vsi) {
|
|
|
|
dev_err(dev, "could not allocate VSI\n");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
vsi->port_info = pi;
|
|
|
|
vsi->vsw = pf->first_sw;
|
2019-04-17 01:21:24 +08:00
|
|
|
if (vsi->type == ICE_VSI_PF)
|
|
|
|
vsi->ethtype = ETH_P_PAUSE;
|
|
|
|
|
2020-05-12 09:01:40 +08:00
|
|
|
ice_alloc_fd_res(vsi);
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi_type != ICE_VSI_CHNL) {
|
|
|
|
if (ice_vsi_get_qs(vsi)) {
|
|
|
|
dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
|
|
|
|
vsi->idx);
|
|
|
|
goto unroll_vsi_alloc;
|
|
|
|
}
|
2018-09-20 08:23:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* set RSS capabilities */
|
|
|
|
ice_vsi_set_rss_params(vsi);
|
|
|
|
|
2019-02-20 07:04:13 +08:00
|
|
|
/* set TC configuration */
|
2018-10-27 02:44:35 +08:00
|
|
|
ice_vsi_set_tc_cfg(vsi);
|
|
|
|
|
2018-09-20 08:23:10 +08:00
|
|
|
/* create the VSI */
|
2019-11-08 22:23:29 +08:00
|
|
|
ret = ice_vsi_init(vsi, true);
|
2018-09-20 08:23:10 +08:00
|
|
|
if (ret)
|
|
|
|
goto unroll_get_qs;
|
|
|
|
|
2021-12-03 00:38:41 +08:00
|
|
|
ice_vsi_init_vlan_ops(vsi);
|
|
|
|
|
2018-09-20 08:23:10 +08:00
|
|
|
switch (vsi->type) {
|
2020-05-12 09:01:40 +08:00
|
|
|
case ICE_VSI_CTRL:
|
2021-08-20 08:08:55 +08:00
|
|
|
case ICE_VSI_SWITCHDEV_CTRL:
|
2018-09-20 08:23:10 +08:00
|
|
|
case ICE_VSI_PF:
|
|
|
|
ret = ice_vsi_alloc_q_vectors(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto unroll_vsi_init;
|
|
|
|
|
|
|
|
ret = ice_vsi_setup_vector_base(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto unroll_alloc_q_vector;
|
|
|
|
|
2019-03-01 07:25:59 +08:00
|
|
|
ret = ice_vsi_set_q_vectors_reg_idx(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto unroll_vector_base;
|
|
|
|
|
2018-09-20 08:23:10 +08:00
|
|
|
ret = ice_vsi_alloc_rings(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto unroll_vector_base;
|
|
|
|
|
|
|
|
ice_vsi_map_rings_to_vectors(vsi);
|
|
|
|
|
2020-05-12 09:01:40 +08:00
|
|
|
/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
|
|
|
|
if (vsi->type != ICE_VSI_CTRL)
|
|
|
|
/* Do not exit if configuring RSS had an issue, at
|
|
|
|
* least receive traffic on first queue. Hence no
|
|
|
|
* need to capture return value
|
|
|
|
*/
|
|
|
|
if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
|
|
|
|
ice_vsi_cfg_rss_lut_key(vsi);
|
|
|
|
ice_vsi_set_rss_flow_fld(vsi);
|
|
|
|
}
|
2020-05-12 09:01:46 +08:00
|
|
|
ice_init_arfs(vsi);
|
2018-09-20 08:23:10 +08:00
|
|
|
break;
|
2021-10-16 07:35:15 +08:00
|
|
|
case ICE_VSI_CHNL:
|
|
|
|
if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
|
|
|
|
ice_vsi_cfg_rss_lut_key(vsi);
|
|
|
|
ice_vsi_set_rss_flow_fld(vsi);
|
|
|
|
}
|
|
|
|
break;
|
2018-09-20 08:42:56 +08:00
|
|
|
case ICE_VSI_VF:
|
|
|
|
/* VF driver will take care of creating netdev for this type and
|
|
|
|
* map queues to vectors through Virtchnl, PF driver only
|
|
|
|
* creates a VSI and corresponding structures for bookkeeping
|
|
|
|
* purpose
|
|
|
|
*/
|
|
|
|
ret = ice_vsi_alloc_q_vectors(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto unroll_vsi_init;
|
|
|
|
|
|
|
|
ret = ice_vsi_alloc_rings(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto unroll_alloc_q_vector;
|
|
|
|
|
2019-03-01 07:25:59 +08:00
|
|
|
ret = ice_vsi_set_q_vectors_reg_idx(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto unroll_vector_base;
|
|
|
|
|
2019-04-17 01:30:50 +08:00
|
|
|
/* Do not exit if configuring RSS had an issue, at least
|
|
|
|
* receive traffic on first queue. Hence no need to capture
|
|
|
|
* return value
|
|
|
|
*/
|
2020-01-17 23:39:17 +08:00
|
|
|
if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
|
2019-04-17 01:30:50 +08:00
|
|
|
ice_vsi_cfg_rss_lut_key(vsi);
|
2020-01-17 23:39:17 +08:00
|
|
|
ice_vsi_set_vf_rss_flow_fld(vsi);
|
|
|
|
}
|
2018-09-20 08:42:56 +08:00
|
|
|
break;
|
2019-04-17 01:30:43 +08:00
|
|
|
case ICE_VSI_LB:
|
|
|
|
ret = ice_vsi_alloc_rings(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto unroll_vsi_init;
|
|
|
|
break;
|
2018-09-20 08:23:10 +08:00
|
|
|
default:
|
2018-10-27 02:44:46 +08:00
|
|
|
/* clean up the resources and exit */
|
2018-09-20 08:23:10 +08:00
|
|
|
goto unroll_vsi_init;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* configure VSI nodes based on number of queues and TC's */
|
2021-10-16 07:35:15 +08:00
|
|
|
ice_for_each_traffic_class(i) {
|
|
|
|
if (!(vsi->tc_cfg.ena_tc & BIT(i)))
|
|
|
|
continue;
|
2018-09-20 08:23:10 +08:00
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi->type == ICE_VSI_CHNL) {
|
|
|
|
if (!vsi->alloc_txq && vsi->num_txq)
|
|
|
|
max_txqs[i] = vsi->num_txq;
|
|
|
|
else
|
|
|
|
max_txqs[i] = pf->num_lan_tx;
|
|
|
|
} else {
|
|
|
|
max_txqs[i] = vsi->alloc_txq;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
dev_dbg(dev, "vsi->tc_cfg.ena_tc = %d\n", vsi->tc_cfg.ena_tc);
|
2021-10-08 07:00:23 +08:00
|
|
|
ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
|
|
|
|
max_txqs);
|
|
|
|
if (ret) {
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_err(dev, "VSI %d failed lan queue config, error %d\n",
|
2021-10-08 07:00:23 +08:00
|
|
|
vsi->vsi_num, ret);
|
2020-09-02 23:53:47 +08:00
|
|
|
goto unroll_clear_rings;
|
2018-09-20 08:23:10 +08:00
|
|
|
}
|
|
|
|
|
2019-04-17 01:21:24 +08:00
|
|
|
/* Add switch rule to drop all Tx Flow Control Frames, of look up
|
|
|
|
* type ETHERTYPE from VSIs, and restrict malicious VF from sending
|
|
|
|
* out PAUSE or PFC frames. If enabled, FW can still send FC frames.
|
|
|
|
* The rule is added once for PF VSI in order to create appropriate
|
|
|
|
* recipe, since VSI/VSI list is ignored with drop action...
|
2019-11-06 18:05:32 +08:00
|
|
|
* Also add rules to handle LLDP Tx packets. Tx LLDP packets need to
|
|
|
|
* be dropped so that VFs cannot send LLDP packets to reconfig DCB
|
|
|
|
* settings in the HW.
|
2019-04-17 01:21:24 +08:00
|
|
|
*/
|
2019-11-06 18:05:32 +08:00
|
|
|
if (!ice_is_safe_mode(pf))
|
2019-09-09 21:47:46 +08:00
|
|
|
if (vsi->type == ICE_VSI_PF) {
|
2020-05-08 08:41:08 +08:00
|
|
|
ice_fltr_add_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX,
|
|
|
|
ICE_DROP_PACKET);
|
2019-09-09 21:47:46 +08:00
|
|
|
ice_cfg_sw_lldp(vsi, true, true);
|
|
|
|
}
|
2019-04-17 01:34:55 +08:00
|
|
|
|
2020-11-21 08:39:27 +08:00
|
|
|
if (!vsi->agg_node)
|
|
|
|
ice_set_agg_vsi(vsi);
|
2018-09-20 08:23:10 +08:00
|
|
|
return vsi;
|
|
|
|
|
2020-01-22 23:21:24 +08:00
|
|
|
unroll_clear_rings:
|
|
|
|
ice_vsi_clear_rings(vsi);
|
2018-09-20 08:23:10 +08:00
|
|
|
unroll_vector_base:
|
ice: Split irq_tracker into sw_irq_tracker and hw_irq_tracker
For the PF driver, when mapping interrupts to queues, we need to request
IRQs from the kernel and we also have to allocate interrupts from
the device.
Similarly, when the VF driver (iavf.ko) initializes, it requests the kernel
IRQs that it needs but it can't directly allocate interrupts in the device.
Instead, it sends a mailbox message to the ice driver, which then allocates
interrupts in the device on the VF driver's behalf.
Currently both these cases end up having to reserve entries in
pf->irq_tracker but irq_tracker itself is sized based on how many vectors
the PF driver needs. Under the right circumstances, the VF driver can fail
to get entries in irq_tracker, which will result in the VF driver failing
probe.
To fix this, sw_irq_tracker and hw_irq_tracker are introduced. The
sw_irq_tracker tracks only the PF's IRQ request and doesn't play any
role in VF init. hw_irq_tracker represents the device's interrupt space.
When interrupts have to be allocated in the device for either PF or VF,
hw_irq_tracker will be looked up to see if the device has run out of
interrupts.
Signed-off-by: Preethi Banala <preethi.banala@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-09-20 08:23:16 +08:00
|
|
|
/* reclaim SW interrupts back to the common pool */
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
|
ice: Split irq_tracker into sw_irq_tracker and hw_irq_tracker
For the PF driver, when mapping interrupts to queues, we need to request
IRQs from the kernel and we also have to allocate interrupts from
the device.
Similarly, when the VF driver (iavf.ko) initializes, it requests the kernel
IRQs that it needs but it can't directly allocate interrupts in the device.
Instead, it sends a mailbox message to the ice driver, which then allocates
interrupts in the device on the VF driver's behalf.
Currently both these cases end up having to reserve entries in
pf->irq_tracker but irq_tracker itself is sized based on how many vectors
the PF driver needs. Under the right circumstances, the VF driver can fail
to get entries in irq_tracker, which will result in the VF driver failing
probe.
To fix this, sw_irq_tracker and hw_irq_tracker are introduced. The
sw_irq_tracker tracks only the PF's IRQ request and doesn't play any
role in VF init. hw_irq_tracker represents the device's interrupt space.
When interrupts have to be allocated in the device for either PF or VF,
hw_irq_tracker will be looked up to see if the device has run out of
interrupts.
Signed-off-by: Preethi Banala <preethi.banala@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-09-20 08:23:16 +08:00
|
|
|
pf->num_avail_sw_msix += vsi->num_q_vectors;
|
2018-09-20 08:23:10 +08:00
|
|
|
unroll_alloc_q_vector:
|
|
|
|
ice_vsi_free_q_vectors(vsi);
|
|
|
|
unroll_vsi_init:
|
|
|
|
ice_vsi_delete(vsi);
|
|
|
|
unroll_get_qs:
|
|
|
|
ice_vsi_put_qs(vsi);
|
2020-05-12 09:01:40 +08:00
|
|
|
unroll_vsi_alloc:
|
2020-11-21 08:39:26 +08:00
|
|
|
if (vsi_type == ICE_VSI_VF)
|
|
|
|
ice_enable_lag(pf->lag);
|
2018-09-20 08:23:10 +08:00
|
|
|
ice_vsi_clear(vsi);
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:06 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_release_msix - Clear the queue to Interrupt mapping in HW
|
|
|
|
* @vsi: the VSI being cleaned up
|
|
|
|
*/
|
|
|
|
static void ice_vsi_release_msix(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
struct ice_hw *hw = &pf->hw;
|
|
|
|
u32 txq = 0;
|
|
|
|
u32 rxq = 0;
|
|
|
|
int i, q;
|
|
|
|
|
2021-08-19 20:00:04 +08:00
|
|
|
ice_for_each_q_vector(vsi, i) {
|
2018-09-20 08:23:06 +08:00
|
|
|
struct ice_q_vector *q_vector = vsi->q_vectors[i];
|
|
|
|
|
2021-04-01 05:16:56 +08:00
|
|
|
ice_write_intrl(q_vector, 0);
|
2018-09-20 08:23:06 +08:00
|
|
|
for (q = 0; q < q_vector->num_ring_tx; q++) {
|
2021-04-01 05:16:56 +08:00
|
|
|
ice_write_itr(&q_vector->tx, 0);
|
2018-09-20 08:23:06 +08:00
|
|
|
wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
|
2019-11-05 01:38:56 +08:00
|
|
|
if (ice_is_xdp_ena_vsi(vsi)) {
|
|
|
|
u32 xdp_txq = txq + vsi->num_xdp_txq;
|
|
|
|
|
|
|
|
wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
|
|
|
|
}
|
2018-09-20 08:23:06 +08:00
|
|
|
txq++;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (q = 0; q < q_vector->num_ring_rx; q++) {
|
2021-04-01 05:16:56 +08:00
|
|
|
ice_write_itr(&q_vector->rx, 0);
|
2018-09-20 08:23:06 +08:00
|
|
|
wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0);
|
|
|
|
rxq++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ice_flush(hw);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_free_irq - Free the IRQ association with the OS
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
*/
|
|
|
|
void ice_vsi_free_irq(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
int base = vsi->base_vector;
|
2019-06-26 17:20:25 +08:00
|
|
|
int i;
|
2018-09-20 08:23:06 +08:00
|
|
|
|
2019-06-26 17:20:25 +08:00
|
|
|
if (!vsi->q_vectors || !vsi->irqs_ready)
|
|
|
|
return;
|
2018-09-20 08:23:06 +08:00
|
|
|
|
2019-06-26 17:20:25 +08:00
|
|
|
ice_vsi_release_msix(vsi);
|
|
|
|
if (vsi->type == ICE_VSI_VF)
|
|
|
|
return;
|
ice: Split irq_tracker into sw_irq_tracker and hw_irq_tracker
For the PF driver, when mapping interrupts to queues, we need to request
IRQs from the kernel and we also have to allocate interrupts from
the device.
Similarly, when the VF driver (iavf.ko) initializes, it requests the kernel
IRQs that it needs but it can't directly allocate interrupts in the device.
Instead, it sends a mailbox message to the ice driver, which then allocates
interrupts in the device on the VF driver's behalf.
Currently both these cases end up having to reserve entries in
pf->irq_tracker but irq_tracker itself is sized based on how many vectors
the PF driver needs. Under the right circumstances, the VF driver can fail
to get entries in irq_tracker, which will result in the VF driver failing
probe.
To fix this, sw_irq_tracker and hw_irq_tracker are introduced. The
sw_irq_tracker tracks only the PF's IRQ request and doesn't play any
role in VF init. hw_irq_tracker represents the device's interrupt space.
When interrupts have to be allocated in the device for either PF or VF,
hw_irq_tracker will be looked up to see if the device has run out of
interrupts.
Signed-off-by: Preethi Banala <preethi.banala@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-09-20 08:23:16 +08:00
|
|
|
|
2019-06-26 17:20:25 +08:00
|
|
|
vsi->irqs_ready = false;
|
|
|
|
ice_for_each_q_vector(vsi, i) {
|
|
|
|
u16 vector = i + base;
|
|
|
|
int irq_num;
|
2018-09-20 08:23:06 +08:00
|
|
|
|
2019-06-26 17:20:25 +08:00
|
|
|
irq_num = pf->msix_entries[vector].vector;
|
2018-09-20 08:23:06 +08:00
|
|
|
|
2019-06-26 17:20:25 +08:00
|
|
|
/* free only the irqs that were actually requested */
|
|
|
|
if (!vsi->q_vectors[i] ||
|
|
|
|
!(vsi->q_vectors[i]->num_ring_tx ||
|
|
|
|
vsi->q_vectors[i]->num_ring_rx))
|
|
|
|
continue;
|
2018-09-20 08:23:06 +08:00
|
|
|
|
2019-06-26 17:20:25 +08:00
|
|
|
/* clear the affinity notifier in the IRQ descriptor */
|
|
|
|
irq_set_affinity_notifier(irq_num, NULL);
|
2018-09-20 08:23:06 +08:00
|
|
|
|
2019-06-26 17:20:25 +08:00
|
|
|
/* clear the affinity_mask in the IRQ descriptor */
|
|
|
|
irq_set_affinity_hint(irq_num, NULL);
|
|
|
|
synchronize_irq(irq_num);
|
2019-11-08 22:23:26 +08:00
|
|
|
devm_free_irq(ice_pf_to_dev(pf), irq_num, vsi->q_vectors[i]);
|
2018-09-20 08:23:06 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_free_tx_rings - Free Tx resources for VSI queues
|
|
|
|
* @vsi: the VSI having resources freed
|
|
|
|
*/
|
|
|
|
void ice_vsi_free_tx_rings(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!vsi->tx_rings)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ice_for_each_txq(vsi, i)
|
|
|
|
if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
|
|
|
|
ice_free_tx_ring(vsi->tx_rings[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_free_rx_rings - Free Rx resources for VSI queues
|
|
|
|
* @vsi: the VSI having resources freed
|
|
|
|
*/
|
|
|
|
void ice_vsi_free_rx_rings(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!vsi->rx_rings)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ice_for_each_rxq(vsi, i)
|
|
|
|
if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
|
|
|
|
ice_free_rx_ring(vsi->rx_rings[i]);
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:08 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_close - Shut down a VSI
|
|
|
|
* @vsi: the VSI being shut down
|
|
|
|
*/
|
|
|
|
void ice_vsi_close(struct ice_vsi *vsi)
|
|
|
|
{
|
2021-03-03 02:15:37 +08:00
|
|
|
if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state))
|
2018-09-20 08:23:08 +08:00
|
|
|
ice_down(vsi);
|
|
|
|
|
|
|
|
ice_vsi_free_irq(vsi);
|
|
|
|
ice_vsi_free_tx_rings(vsi);
|
|
|
|
ice_vsi_free_rx_rings(vsi);
|
|
|
|
}
|
|
|
|
|
2019-11-06 18:05:27 +08:00
|
|
|
/**
|
|
|
|
* ice_ena_vsi - resume a VSI
|
|
|
|
* @vsi: the VSI being resume
|
|
|
|
* @locked: is the rtnl_lock already held
|
|
|
|
*/
|
|
|
|
int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
|
|
|
|
{
|
|
|
|
int err = 0;
|
|
|
|
|
2021-03-03 02:15:37 +08:00
|
|
|
if (!test_bit(ICE_VSI_NEEDS_RESTART, vsi->state))
|
2019-11-06 18:05:27 +08:00
|
|
|
return 0;
|
|
|
|
|
2021-03-03 02:15:37 +08:00
|
|
|
clear_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
|
2019-11-06 18:05:27 +08:00
|
|
|
|
|
|
|
if (vsi->netdev && vsi->type == ICE_VSI_PF) {
|
|
|
|
if (netif_running(vsi->netdev)) {
|
|
|
|
if (!locked)
|
|
|
|
rtnl_lock();
|
|
|
|
|
2021-02-27 05:19:26 +08:00
|
|
|
err = ice_open_internal(vsi->netdev);
|
2019-11-06 18:05:27 +08:00
|
|
|
|
|
|
|
if (!locked)
|
|
|
|
rtnl_unlock();
|
|
|
|
}
|
2020-05-12 09:01:40 +08:00
|
|
|
} else if (vsi->type == ICE_VSI_CTRL) {
|
|
|
|
err = ice_vsi_open_ctrl(vsi);
|
2019-11-06 18:05:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_dis_vsi - pause a VSI
|
|
|
|
* @vsi: the VSI being paused
|
|
|
|
* @locked: is the rtnl_lock already held
|
|
|
|
*/
|
|
|
|
void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
|
|
|
|
{
|
2021-03-03 02:15:37 +08:00
|
|
|
if (test_bit(ICE_VSI_DOWN, vsi->state))
|
2019-11-06 18:05:27 +08:00
|
|
|
return;
|
|
|
|
|
2021-03-03 02:15:37 +08:00
|
|
|
set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
|
2019-11-06 18:05:27 +08:00
|
|
|
|
|
|
|
if (vsi->type == ICE_VSI_PF && vsi->netdev) {
|
|
|
|
if (netif_running(vsi->netdev)) {
|
|
|
|
if (!locked)
|
|
|
|
rtnl_lock();
|
|
|
|
|
2021-02-27 05:19:26 +08:00
|
|
|
ice_vsi_close(vsi);
|
2019-11-06 18:05:27 +08:00
|
|
|
|
|
|
|
if (!locked)
|
|
|
|
rtnl_unlock();
|
|
|
|
} else {
|
|
|
|
ice_vsi_close(vsi);
|
|
|
|
}
|
2021-08-20 08:08:55 +08:00
|
|
|
} else if (vsi->type == ICE_VSI_CTRL ||
|
|
|
|
vsi->type == ICE_VSI_SWITCHDEV_CTRL) {
|
2020-05-12 09:01:40 +08:00
|
|
|
ice_vsi_close(vsi);
|
2019-11-06 18:05:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:06 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
|
|
|
|
* @vsi: the VSI being un-configured
|
|
|
|
*/
|
|
|
|
void ice_vsi_dis_irq(struct ice_vsi *vsi)
|
|
|
|
{
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
int base = vsi->base_vector;
|
2018-09-20 08:23:06 +08:00
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
struct ice_hw *hw = &pf->hw;
|
|
|
|
u32 val;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* disable interrupt causation from each queue */
|
|
|
|
if (vsi->tx_rings) {
|
|
|
|
ice_for_each_txq(vsi, i) {
|
|
|
|
if (vsi->tx_rings[i]) {
|
|
|
|
u16 reg;
|
|
|
|
|
|
|
|
reg = vsi->tx_rings[i]->reg_idx;
|
|
|
|
val = rd32(hw, QINT_TQCTL(reg));
|
|
|
|
val &= ~QINT_TQCTL_CAUSE_ENA_M;
|
|
|
|
wr32(hw, QINT_TQCTL(reg), val);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vsi->rx_rings) {
|
|
|
|
ice_for_each_rxq(vsi, i) {
|
|
|
|
if (vsi->rx_rings[i]) {
|
|
|
|
u16 reg;
|
|
|
|
|
|
|
|
reg = vsi->rx_rings[i]->reg_idx;
|
|
|
|
val = rd32(hw, QINT_RQCTL(reg));
|
|
|
|
val &= ~QINT_RQCTL_CAUSE_ENA_M;
|
|
|
|
wr32(hw, QINT_RQCTL(reg), val);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* disable each interrupt */
|
2019-09-09 21:47:46 +08:00
|
|
|
ice_for_each_q_vector(vsi, i) {
|
|
|
|
if (!vsi->q_vectors[i])
|
|
|
|
continue;
|
2019-06-26 17:20:25 +08:00
|
|
|
wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0);
|
2019-09-09 21:47:46 +08:00
|
|
|
}
|
2018-09-20 08:23:06 +08:00
|
|
|
|
2019-06-26 17:20:25 +08:00
|
|
|
ice_flush(hw);
|
2019-03-01 07:25:59 +08:00
|
|
|
|
2019-07-29 17:04:45 +08:00
|
|
|
/* don't call synchronize_irq() for VF's from the host */
|
|
|
|
if (vsi->type == ICE_VSI_VF)
|
|
|
|
return;
|
|
|
|
|
2019-06-26 17:20:25 +08:00
|
|
|
ice_for_each_q_vector(vsi, i)
|
|
|
|
synchronize_irq(pf->msix_entries[i + base].vector);
|
2018-09-20 08:23:06 +08:00
|
|
|
}
|
|
|
|
|
2019-04-17 01:34:50 +08:00
|
|
|
/**
|
|
|
|
* ice_napi_del - Remove NAPI handler for the VSI
|
|
|
|
* @vsi: VSI for which NAPI handler is to be removed
|
|
|
|
*/
|
|
|
|
void ice_napi_del(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
int v_idx;
|
|
|
|
|
|
|
|
if (!vsi->netdev)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ice_for_each_q_vector(vsi, v_idx)
|
|
|
|
netif_napi_del(&vsi->q_vectors[v_idx]->napi);
|
|
|
|
}
|
|
|
|
|
2022-02-17 05:37:29 +08:00
|
|
|
/**
|
|
|
|
* ice_free_vf_ctrl_res - Free the VF control VSI resource
|
|
|
|
* @pf: pointer to PF structure
|
|
|
|
* @vsi: the VSI to free resources for
|
|
|
|
*
|
|
|
|
* Check if the VF control VSI resource is still in use. If no VF is using it
|
|
|
|
* any more, release the VSI resource. Otherwise, leave it to be cleaned up
|
|
|
|
* once no other VF uses it.
|
|
|
|
*/
|
|
|
|
static void ice_free_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi)
|
|
|
|
{
|
2022-02-17 05:37:35 +08:00
|
|
|
struct ice_vf *vf;
|
|
|
|
unsigned int bkt;
|
2022-02-17 05:37:29 +08:00
|
|
|
|
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-17 05:37:38 +08:00
|
|
|
rcu_read_lock();
|
|
|
|
ice_for_each_vf_rcu(pf, bkt, vf) {
|
|
|
|
if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
|
|
|
|
rcu_read_unlock();
|
2022-02-17 05:37:29 +08:00
|
|
|
return;
|
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-17 05:37:38 +08:00
|
|
|
}
|
2022-02-17 05:37:29 +08:00
|
|
|
}
|
ice: convert VF storage to hash table with krefs and RCU
The ice driver stores VF structures in a simple array which is allocated
once at the time of VF creation. The VF structures are then accessed
from the array by their VF ID. The ID must be between 0 and the number
of allocated VFs.
Multiple threads can access this table:
* .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
* interrupts, such as due to messages from the VF using the virtchnl
communication
* processing such as device reset
* commands to add or remove VFs
The current implementation does not keep track of when all threads are
done operating on a VF and can potentially result in use-after-free
issues caused by one thread accessing a VF structure after it has been
released when removing VFs. Some of these are prevented with various
state flags and checks.
In addition, this structure is quite static and does not support a
planned future where virtualization can be more dynamic. As we begin to
look at supporting Scalable IOV with the ice driver (as opposed to just
supporting Single Root IOV), this structure is not sufficient.
In the future, VFs will be able to be added and removed individually and
dynamically.
To allow for this, and to better protect against a whole class of
use-after-free bugs, replace the VF storage with a combination of a hash
table and krefs to reference track all of the accesses to VFs through
the hash table.
A hash table still allows efficient look up of the VF given its ID, but
also allows adding and removing VFs. It does not require contiguous VF
IDs.
The use of krefs allows the cleanup of the VF memory to be delayed until
after all threads have released their reference (by calling ice_put_vf).
To prevent corruption of the hash table, a combination of RCU and the
mutex table_lock are used. Addition and removal from the hash table use
the RCU-aware hash macros. This allows simple read-only look ups that
iterate to locate a single VF can be fast using RCU. Accesses which
modify the hash table, or which can't take RCU because they sleep, will
hold the mutex lock.
By using this design, we have a stronger guarantee that the VF structure
can't be released until after all threads are finished operating on it.
We also pave the way for the more dynamic Scalable IOV implementation in
the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2022-02-17 05:37:38 +08:00
|
|
|
rcu_read_unlock();
|
2022-02-17 05:37:29 +08:00
|
|
|
|
|
|
|
/* No other VFs left that have control VSI. It is now safe to reclaim
|
|
|
|
* SW interrupts back to the common pool.
|
|
|
|
*/
|
|
|
|
ice_free_res(pf->irq_tracker, vsi->base_vector,
|
|
|
|
ICE_RES_VF_CTRL_VEC_ID);
|
|
|
|
pf->num_avail_sw_msix += vsi->num_q_vectors;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:09 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_release - Delete a VSI and free its resources
|
|
|
|
* @vsi: the VSI being removed
|
|
|
|
*
|
|
|
|
* Returns 0 on success or < 0 on error
|
|
|
|
*/
|
|
|
|
int ice_vsi_release(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
struct ice_pf *pf;
|
2021-10-08 06:59:03 +08:00
|
|
|
int err;
|
2018-09-20 08:23:09 +08:00
|
|
|
|
|
|
|
if (!vsi->back)
|
|
|
|
return -ENODEV;
|
|
|
|
pf = vsi->back;
|
2019-02-09 04:50:36 +08:00
|
|
|
|
2019-04-17 01:24:37 +08:00
|
|
|
/* do not unregister while driver is in the reset recovery pending
|
|
|
|
* state. Since reset/rebuild happens through PF service task workqueue,
|
|
|
|
* it's not a good idea to unregister netdev that is associated to the
|
|
|
|
* PF that is running the work queue items currently. This is done to
|
|
|
|
* avoid check_flush_dependency() warning on this wq
|
2018-09-20 08:23:09 +08:00
|
|
|
*/
|
2021-03-03 02:15:41 +08:00
|
|
|
if (vsi->netdev && !ice_is_reset_in_progress(pf->state) &&
|
|
|
|
(test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state))) {
|
2018-09-20 08:23:09 +08:00
|
|
|
unregister_netdev(vsi->netdev);
|
2021-03-03 02:15:41 +08:00
|
|
|
clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
|
ice: refactor devlink_port to be per-VSI
Currently, the devlink_port structure is stored within the ice_pf. This
made sense because we create a single devlink_port for each PF. This
setup does not mesh with the abstractions in the driver very well, and
led to a flow where we accidentally call devlink_port_unregister twice
during error cleanup.
In particular, if devlink_port_register or devlink_port_unregister are
called twice, this leads to a kernel panic. This appears to occur during
some possible flows while cleaning up from a failure during driver
probe.
If register_netdev fails, then we will call devlink_port_unregister in
ice_cfg_netdev as it cleans up. Later, we again call
devlink_port_unregister since we assume that we must cleanup the port
that is associated with the PF structure.
This occurs because we cleanup the devlink_port for the main PF even
though it was not allocated. We allocated the port within a per-VSI
function for managing the main netdev, but did not release the port when
cleaning up that VSI, the allocation and destruction are not aligned.
Instead of attempting to manage the devlink_port as part of the PF
structure, manage it as part of the PF VSI. Doing this has advantages,
as we can match the de-allocation of the devlink_port with the
unregister_netdev associated with the main PF VSI.
Moving the port to the VSI is preferable as it paves the way for
handling devlink ports allocated for other purposes such as SR-IOV VFs.
Since we're changing up how we allocate the devlink_port, also change
the indexing. Originally, we indexed the port using the PF id number.
This came from an old goal of sharing a devlink for each physical
function. Managing devlink instances across multiple function drivers is
not workable. Instead, lets set the port number to the logical port
number returned by firmware and set the index using the VSI index
(sometimes referred to as VSI handle).
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-08 01:54:44 +08:00
|
|
|
}
|
2018-09-20 08:23:09 +08:00
|
|
|
|
2021-08-20 08:08:49 +08:00
|
|
|
if (vsi->type == ICE_VSI_PF)
|
|
|
|
ice_devlink_destroy_pf_port(pf);
|
2021-03-03 02:15:41 +08:00
|
|
|
|
2018-09-20 08:23:09 +08:00
|
|
|
if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
|
|
|
|
ice_rss_clean(vsi);
|
|
|
|
|
|
|
|
/* Disable VSI and free resources */
|
2019-04-17 01:30:43 +08:00
|
|
|
if (vsi->type != ICE_VSI_LB)
|
|
|
|
ice_vsi_dis_irq(vsi);
|
2018-09-20 08:23:09 +08:00
|
|
|
ice_vsi_close(vsi);
|
|
|
|
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
/* SR-IOV determines needed MSIX resources all at once instead of per
|
|
|
|
* VSI since when VFs are spawned we know how many VFs there are and how
|
|
|
|
* many interrupts each VF needs. SR-IOV MSIX resources are also
|
|
|
|
* cleared in the same manner.
|
|
|
|
*/
|
2022-02-17 05:37:29 +08:00
|
|
|
if (vsi->type == ICE_VSI_CTRL && vsi->vf) {
|
|
|
|
ice_free_vf_ctrl_res(pf, vsi);
|
2021-03-09 11:08:03 +08:00
|
|
|
} else if (vsi->type != ICE_VSI_VF) {
|
2018-09-20 08:42:56 +08:00
|
|
|
/* reclaim SW interrupts back to the common pool */
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
|
2018-09-20 08:42:56 +08:00
|
|
|
pf->num_avail_sw_msix += vsi->num_q_vectors;
|
|
|
|
}
|
2018-09-20 08:23:09 +08:00
|
|
|
|
2019-09-09 21:47:46 +08:00
|
|
|
if (!ice_is_safe_mode(pf)) {
|
|
|
|
if (vsi->type == ICE_VSI_PF) {
|
2020-05-08 08:41:08 +08:00
|
|
|
ice_fltr_remove_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX,
|
|
|
|
ICE_DROP_PACKET);
|
2019-09-09 21:47:46 +08:00
|
|
|
ice_cfg_sw_lldp(vsi, true, false);
|
|
|
|
/* The Rx rule will only exist to remove if the LLDP FW
|
|
|
|
* engine is currently stopped
|
|
|
|
*/
|
|
|
|
if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
|
|
|
|
ice_cfg_sw_lldp(vsi, false, false);
|
|
|
|
}
|
2019-04-17 01:34:55 +08:00
|
|
|
}
|
2019-04-17 01:21:24 +08:00
|
|
|
|
2020-05-08 08:41:08 +08:00
|
|
|
ice_fltr_remove_all(vsi);
|
2018-10-27 01:41:02 +08:00
|
|
|
ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
|
2021-06-29 01:53:45 +08:00
|
|
|
err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx);
|
|
|
|
if (err)
|
|
|
|
dev_err(ice_pf_to_dev(vsi->back), "Failed to remove RDMA scheduler config for VSI %u, err %d\n",
|
|
|
|
vsi->vsi_num, err);
|
2018-09-20 08:23:09 +08:00
|
|
|
ice_vsi_delete(vsi);
|
|
|
|
ice_vsi_free_q_vectors(vsi);
|
2019-04-17 01:24:37 +08:00
|
|
|
|
2021-03-03 02:15:41 +08:00
|
|
|
if (vsi->netdev) {
|
|
|
|
if (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state)) {
|
|
|
|
unregister_netdev(vsi->netdev);
|
|
|
|
clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
|
|
|
|
}
|
|
|
|
if (test_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state)) {
|
|
|
|
free_netdev(vsi->netdev);
|
|
|
|
vsi->netdev = NULL;
|
|
|
|
clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
|
|
|
|
}
|
2019-04-17 01:24:37 +08:00
|
|
|
}
|
|
|
|
|
2020-11-21 08:39:27 +08:00
|
|
|
if (vsi->type == ICE_VSI_VF &&
|
|
|
|
vsi->agg_node && vsi->agg_node->valid)
|
|
|
|
vsi->agg_node->num_vsis--;
|
2018-09-20 08:23:09 +08:00
|
|
|
ice_vsi_clear_rings(vsi);
|
|
|
|
|
|
|
|
ice_vsi_put_qs(vsi);
|
|
|
|
|
|
|
|
/* retain SW VSI data structure since it is needed to unregister and
|
|
|
|
* free VSI netdev when PF is not in reset recovery pending state,\
|
|
|
|
* for ex: during rmmod.
|
|
|
|
*/
|
2018-09-20 08:23:11 +08:00
|
|
|
if (!ice_is_reset_in_progress(pf->state))
|
2018-09-20 08:23:09 +08:00
|
|
|
ice_vsi_clear(vsi);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-12-12 19:12:58 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors
|
|
|
|
* @vsi: VSI connected with q_vectors
|
|
|
|
* @coalesce: array of struct with stored coalesce
|
|
|
|
*
|
|
|
|
* Returns array size.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi,
|
|
|
|
struct ice_coalesce_stored *coalesce)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ice_for_each_q_vector(vsi, i) {
|
|
|
|
struct ice_q_vector *q_vector = vsi->q_vectors[i];
|
|
|
|
|
|
|
|
coalesce[i].itr_tx = q_vector->tx.itr_setting;
|
|
|
|
coalesce[i].itr_rx = q_vector->rx.itr_setting;
|
|
|
|
coalesce[i].intrl = q_vector->intrl;
|
2021-03-03 02:12:05 +08:00
|
|
|
|
|
|
|
if (i < vsi->num_txq)
|
|
|
|
coalesce[i].tx_valid = true;
|
|
|
|
if (i < vsi->num_rxq)
|
|
|
|
coalesce[i].rx_valid = true;
|
2019-12-12 19:12:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return vsi->num_q_vectors;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_rebuild_set_coalesce - set coalesce from earlier saved arrays
|
|
|
|
* @vsi: VSI connected with q_vectors
|
|
|
|
* @coalesce: pointer to array of struct with stored coalesce
|
|
|
|
* @size: size of coalesce array
|
|
|
|
*
|
|
|
|
* Before this function, ice_vsi_rebuild_get_coalesce should be called to save
|
|
|
|
* ITR params in arrays. If size is 0 or coalesce wasn't stored set coalesce
|
|
|
|
* to default value.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
|
|
|
|
struct ice_coalesce_stored *coalesce, int size)
|
|
|
|
{
|
2021-04-01 05:16:56 +08:00
|
|
|
struct ice_ring_container *rc;
|
2019-12-12 19:12:58 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if ((size && !coalesce) || !vsi)
|
|
|
|
return;
|
|
|
|
|
2021-03-03 02:12:05 +08:00
|
|
|
/* There are a couple of cases that have to be handled here:
|
|
|
|
* 1. The case where the number of queue vectors stays the same, but
|
|
|
|
* the number of Tx or Rx rings changes (the first for loop)
|
|
|
|
* 2. The case where the number of queue vectors increased (the
|
|
|
|
* second for loop)
|
2020-05-16 08:55:04 +08:00
|
|
|
*/
|
2021-03-03 02:12:05 +08:00
|
|
|
for (i = 0; i < size && i < vsi->num_q_vectors; i++) {
|
|
|
|
/* There are 2 cases to handle here and they are the same for
|
|
|
|
* both Tx and Rx:
|
|
|
|
* if the entry was valid previously (coalesce[i].[tr]x_valid
|
|
|
|
* and the loop variable is less than the number of rings
|
|
|
|
* allocated, then write the previous values
|
|
|
|
*
|
|
|
|
* if the entry was not valid previously, but the number of
|
|
|
|
* rings is less than are allocated (this means the number of
|
|
|
|
* rings increased from previously), then write out the
|
|
|
|
* values in the first element
|
2021-04-01 05:16:56 +08:00
|
|
|
*
|
|
|
|
* Also, always write the ITR, even if in ITR_IS_DYNAMIC
|
|
|
|
* as there is no harm because the dynamic algorithm
|
|
|
|
* will just overwrite.
|
2021-03-03 02:12:05 +08:00
|
|
|
*/
|
2021-04-01 05:16:56 +08:00
|
|
|
if (i < vsi->alloc_rxq && coalesce[i].rx_valid) {
|
|
|
|
rc = &vsi->q_vectors[i]->rx;
|
|
|
|
rc->itr_setting = coalesce[i].itr_rx;
|
|
|
|
ice_write_itr(rc, rc->itr_setting);
|
|
|
|
} else if (i < vsi->alloc_rxq) {
|
|
|
|
rc = &vsi->q_vectors[i]->rx;
|
|
|
|
rc->itr_setting = coalesce[0].itr_rx;
|
|
|
|
ice_write_itr(rc, rc->itr_setting);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i < vsi->alloc_txq && coalesce[i].tx_valid) {
|
|
|
|
rc = &vsi->q_vectors[i]->tx;
|
|
|
|
rc->itr_setting = coalesce[i].itr_tx;
|
|
|
|
ice_write_itr(rc, rc->itr_setting);
|
|
|
|
} else if (i < vsi->alloc_txq) {
|
|
|
|
rc = &vsi->q_vectors[i]->tx;
|
|
|
|
rc->itr_setting = coalesce[0].itr_tx;
|
|
|
|
ice_write_itr(rc, rc->itr_setting);
|
|
|
|
}
|
|
|
|
|
|
|
|
vsi->q_vectors[i]->intrl = coalesce[i].intrl;
|
2021-09-21 03:30:13 +08:00
|
|
|
ice_set_q_vector_intrl(vsi->q_vectors[i]);
|
2021-03-03 02:12:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* the number of queue vectors increased so write whatever is in
|
|
|
|
* the first element
|
|
|
|
*/
|
|
|
|
for (; i < vsi->num_q_vectors; i++) {
|
2021-04-01 05:16:56 +08:00
|
|
|
/* transmit */
|
|
|
|
rc = &vsi->q_vectors[i]->tx;
|
|
|
|
rc->itr_setting = coalesce[0].itr_tx;
|
|
|
|
ice_write_itr(rc, rc->itr_setting);
|
|
|
|
|
|
|
|
/* receive */
|
|
|
|
rc = &vsi->q_vectors[i]->rx;
|
|
|
|
rc->itr_setting = coalesce[0].itr_rx;
|
|
|
|
ice_write_itr(rc, rc->itr_setting);
|
|
|
|
|
|
|
|
vsi->q_vectors[i]->intrl = coalesce[0].intrl;
|
2021-09-21 03:30:13 +08:00
|
|
|
ice_set_q_vector_intrl(vsi->q_vectors[i]);
|
2021-03-03 02:12:05 +08:00
|
|
|
}
|
2019-12-12 19:12:58 +08:00
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:09 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_rebuild - Rebuild VSI after reset
|
|
|
|
* @vsi: VSI to be rebuild
|
2019-11-08 22:23:29 +08:00
|
|
|
* @init_vsi: is this an initialization or a reconfigure of the VSI
|
2018-09-20 08:23:09 +08:00
|
|
|
*
|
|
|
|
* Returns 0 on success and negative value on failure
|
|
|
|
*/
|
2019-11-08 22:23:29 +08:00
|
|
|
int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
|
2018-09-20 08:23:09 +08:00
|
|
|
{
|
|
|
|
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
|
2019-12-12 19:12:58 +08:00
|
|
|
struct ice_coalesce_stored *coalesce;
|
|
|
|
int prev_num_q_vectors = 0;
|
2021-04-01 05:17:02 +08:00
|
|
|
enum ice_vsi_type vtype;
|
2018-10-27 02:44:35 +08:00
|
|
|
struct ice_pf *pf;
|
2018-09-20 08:23:09 +08:00
|
|
|
int ret, i;
|
|
|
|
|
|
|
|
if (!vsi)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2018-10-27 02:44:35 +08:00
|
|
|
pf = vsi->back;
|
2021-04-01 05:17:02 +08:00
|
|
|
vtype = vsi->type;
|
2022-02-17 05:37:29 +08:00
|
|
|
if (WARN_ON(vtype == ICE_VSI_VF) && !vsi->vf)
|
|
|
|
return -EINVAL;
|
2019-02-09 04:51:01 +08:00
|
|
|
|
2021-12-03 00:38:41 +08:00
|
|
|
ice_vsi_init_vlan_ops(vsi);
|
|
|
|
|
2019-12-12 19:12:58 +08:00
|
|
|
coalesce = kcalloc(vsi->num_q_vectors,
|
|
|
|
sizeof(struct ice_coalesce_stored), GFP_KERNEL);
|
2021-03-03 02:12:05 +08:00
|
|
|
if (!coalesce)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
|
|
|
|
|
2018-10-27 02:44:36 +08:00
|
|
|
ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
|
2021-06-29 01:53:45 +08:00
|
|
|
ret = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx);
|
|
|
|
if (ret)
|
|
|
|
dev_err(ice_pf_to_dev(vsi->back), "Failed to remove RDMA scheduler config for VSI %u, err %d\n",
|
|
|
|
vsi->vsi_num, ret);
|
2018-09-20 08:23:09 +08:00
|
|
|
ice_vsi_free_q_vectors(vsi);
|
2019-02-09 04:51:01 +08:00
|
|
|
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
/* SR-IOV determines needed MSIX resources all at once instead of per
|
|
|
|
* VSI since when VFs are spawned we know how many VFs there are and how
|
|
|
|
* many interrupts each VF needs. SR-IOV MSIX resources are also
|
|
|
|
* cleared in the same manner.
|
|
|
|
*/
|
2021-04-01 05:17:02 +08:00
|
|
|
if (vtype != ICE_VSI_VF) {
|
2019-02-09 04:51:01 +08:00
|
|
|
/* reclaim SW interrupts back to the common pool */
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
|
2019-02-09 04:51:01 +08:00
|
|
|
pf->num_avail_sw_msix += vsi->num_q_vectors;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
vsi->base_vector = 0;
|
2019-02-09 04:51:01 +08:00
|
|
|
}
|
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
if (ice_is_xdp_ena_vsi(vsi))
|
|
|
|
/* return value check can be skipped here, it always returns
|
|
|
|
* 0 if reset is in progress
|
|
|
|
*/
|
|
|
|
ice_destroy_xdp_rings(vsi);
|
2019-08-02 16:25:21 +08:00
|
|
|
ice_vsi_put_qs(vsi);
|
2018-09-20 08:23:09 +08:00
|
|
|
ice_vsi_clear_rings(vsi);
|
2019-04-17 01:21:19 +08:00
|
|
|
ice_vsi_free_arrays(vsi);
|
2021-04-01 05:17:02 +08:00
|
|
|
if (vtype == ICE_VSI_VF)
|
2022-02-17 05:37:29 +08:00
|
|
|
ice_vsi_set_num_qs(vsi, vsi->vf);
|
2019-02-27 08:35:09 +08:00
|
|
|
else
|
2022-02-17 05:37:29 +08:00
|
|
|
ice_vsi_set_num_qs(vsi, NULL);
|
2019-08-02 16:25:21 +08:00
|
|
|
|
|
|
|
ret = ice_vsi_alloc_arrays(vsi);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_vsi;
|
|
|
|
|
|
|
|
ice_vsi_get_qs(vsi);
|
2020-05-12 09:01:45 +08:00
|
|
|
|
|
|
|
ice_alloc_fd_res(vsi);
|
2018-10-27 02:44:35 +08:00
|
|
|
ice_vsi_set_tc_cfg(vsi);
|
2018-09-20 08:23:09 +08:00
|
|
|
|
|
|
|
/* Initialize VSI struct elements and create VSI in FW */
|
2019-11-08 22:23:29 +08:00
|
|
|
ret = ice_vsi_init(vsi, init_vsi);
|
2018-09-20 08:23:09 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto err_vsi;
|
|
|
|
|
2021-04-01 05:17:02 +08:00
|
|
|
switch (vtype) {
|
2020-05-12 09:01:45 +08:00
|
|
|
case ICE_VSI_CTRL:
|
2021-08-20 08:08:55 +08:00
|
|
|
case ICE_VSI_SWITCHDEV_CTRL:
|
2018-09-20 08:23:09 +08:00
|
|
|
case ICE_VSI_PF:
|
|
|
|
ret = ice_vsi_alloc_q_vectors(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto err_rings;
|
|
|
|
|
2019-06-26 17:20:14 +08:00
|
|
|
ret = ice_vsi_setup_vector_base(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto err_vectors;
|
|
|
|
|
2019-03-01 07:25:59 +08:00
|
|
|
ret = ice_vsi_set_q_vectors_reg_idx(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto err_vectors;
|
|
|
|
|
2018-09-20 08:23:09 +08:00
|
|
|
ret = ice_vsi_alloc_rings(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto err_vectors;
|
|
|
|
|
|
|
|
ice_vsi_map_rings_to_vectors(vsi);
|
2019-11-05 01:38:56 +08:00
|
|
|
if (ice_is_xdp_ena_vsi(vsi)) {
|
2021-08-19 20:00:03 +08:00
|
|
|
ret = ice_vsi_determine_xdp_res(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto err_vectors;
|
2019-11-05 01:38:56 +08:00
|
|
|
ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
|
|
|
|
if (ret)
|
|
|
|
goto err_vectors;
|
|
|
|
}
|
2020-05-12 09:01:45 +08:00
|
|
|
/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
|
2021-04-01 05:17:02 +08:00
|
|
|
if (vtype != ICE_VSI_CTRL)
|
2020-05-12 09:01:45 +08:00
|
|
|
/* Do not exit if configuring RSS had an issue, at
|
|
|
|
* least receive traffic on first queue. Hence no
|
|
|
|
* need to capture return value
|
|
|
|
*/
|
|
|
|
if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
|
|
|
|
ice_vsi_cfg_rss_lut_key(vsi);
|
2018-09-20 08:23:09 +08:00
|
|
|
break;
|
2018-09-20 08:42:56 +08:00
|
|
|
case ICE_VSI_VF:
|
|
|
|
ret = ice_vsi_alloc_q_vectors(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto err_rings;
|
|
|
|
|
2019-03-01 07:25:59 +08:00
|
|
|
ret = ice_vsi_set_q_vectors_reg_idx(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto err_vectors;
|
|
|
|
|
2018-09-20 08:42:56 +08:00
|
|
|
ret = ice_vsi_alloc_rings(vsi);
|
|
|
|
if (ret)
|
|
|
|
goto err_vectors;
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
break;
|
|
|
|
case ICE_VSI_CHNL:
|
|
|
|
if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
|
|
|
|
ice_vsi_cfg_rss_lut_key(vsi);
|
|
|
|
ice_vsi_set_rss_flow_fld(vsi);
|
|
|
|
}
|
2018-09-20 08:42:56 +08:00
|
|
|
break;
|
2018-09-20 08:23:09 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* configure VSI nodes based on number of queues and TC's */
|
2019-11-05 01:38:56 +08:00
|
|
|
for (i = 0; i < vsi->tc_cfg.numtc; i++) {
|
2021-10-16 07:35:15 +08:00
|
|
|
/* configure VSI nodes based on number of queues and TC's.
|
|
|
|
* ADQ creates VSIs for each TC/Channel but doesn't
|
|
|
|
* allocate queues instead it reconfigures the PF queues
|
|
|
|
* as per the TC command. So max_txqs should point to the
|
|
|
|
* PF Tx queues.
|
|
|
|
*/
|
|
|
|
if (vtype == ICE_VSI_CHNL)
|
|
|
|
max_txqs[i] = pf->num_lan_tx;
|
|
|
|
else
|
|
|
|
max_txqs[i] = vsi->alloc_txq;
|
2018-09-20 08:23:09 +08:00
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
if (ice_is_xdp_ena_vsi(vsi))
|
|
|
|
max_txqs[i] += vsi->num_xdp_txq;
|
|
|
|
}
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
|
|
|
|
/* If MQPRIO is set, means channel code path, hence for main
|
|
|
|
* VSI's, use TC as 1
|
|
|
|
*/
|
2021-10-08 07:00:23 +08:00
|
|
|
ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, max_txqs);
|
2021-10-16 07:35:15 +08:00
|
|
|
else
|
2021-10-08 07:00:23 +08:00
|
|
|
ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx,
|
|
|
|
vsi->tc_cfg.ena_tc, max_txqs);
|
2021-10-16 07:35:15 +08:00
|
|
|
|
2021-10-08 07:00:23 +08:00
|
|
|
if (ret) {
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %d\n",
|
2021-10-08 07:00:23 +08:00
|
|
|
vsi->vsi_num, ret);
|
2019-11-08 22:23:29 +08:00
|
|
|
if (init_vsi) {
|
|
|
|
ret = -EIO;
|
|
|
|
goto err_vectors;
|
|
|
|
} else {
|
|
|
|
return ice_schedule_reset(pf, ICE_RESET_PFR);
|
|
|
|
}
|
2018-09-20 08:23:09 +08:00
|
|
|
}
|
2019-12-12 19:12:58 +08:00
|
|
|
ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
|
|
|
|
kfree(coalesce);
|
|
|
|
|
2018-09-20 08:23:09 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_vectors:
|
|
|
|
ice_vsi_free_q_vectors(vsi);
|
|
|
|
err_rings:
|
|
|
|
if (vsi->netdev) {
|
|
|
|
vsi->current_netdev_flags = 0;
|
|
|
|
unregister_netdev(vsi->netdev);
|
|
|
|
free_netdev(vsi->netdev);
|
|
|
|
vsi->netdev = NULL;
|
|
|
|
}
|
|
|
|
err_vsi:
|
|
|
|
ice_vsi_clear(vsi);
|
2021-03-03 02:15:38 +08:00
|
|
|
set_bit(ICE_RESET_FAILED, pf->state);
|
2019-12-12 19:12:58 +08:00
|
|
|
kfree(coalesce);
|
2018-09-20 08:23:09 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-09-20 08:23:06 +08:00
|
|
|
/**
|
2018-09-20 08:23:11 +08:00
|
|
|
* ice_is_reset_in_progress - check for a reset in progress
|
2019-04-17 01:35:03 +08:00
|
|
|
* @state: PF state field
|
2018-09-20 08:23:06 +08:00
|
|
|
*/
|
2018-09-20 08:23:11 +08:00
|
|
|
bool ice_is_reset_in_progress(unsigned long *state)
|
2018-09-20 08:23:06 +08:00
|
|
|
{
|
2021-03-03 02:15:38 +08:00
|
|
|
return test_bit(ICE_RESET_OICR_RECV, state) ||
|
|
|
|
test_bit(ICE_PFR_REQ, state) ||
|
|
|
|
test_bit(ICE_CORER_REQ, state) ||
|
|
|
|
test_bit(ICE_GLOBR_REQ, state);
|
2018-09-20 08:23:06 +08:00
|
|
|
}
|
2019-03-01 07:24:24 +08:00
|
|
|
|
2021-05-06 23:39:59 +08:00
|
|
|
/**
|
|
|
|
* ice_wait_for_reset - Wait for driver to finish reset and rebuild
|
|
|
|
* @pf: pointer to the PF structure
|
|
|
|
* @timeout: length of time to wait, in jiffies
|
|
|
|
*
|
|
|
|
* Wait (sleep) for a short time until the driver finishes cleaning up from
|
|
|
|
* a device reset. The caller must be able to sleep. Use this to delay
|
|
|
|
* operations that could fail while the driver is cleaning up after a device
|
|
|
|
* reset.
|
|
|
|
*
|
|
|
|
* Returns 0 on success, -EBUSY if the reset is not finished within the
|
|
|
|
* timeout, and -ERESTARTSYS if the thread was interrupted.
|
|
|
|
*/
|
|
|
|
int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout)
|
|
|
|
{
|
|
|
|
long ret;
|
|
|
|
|
|
|
|
ret = wait_event_interruptible_timeout(pf->reset_wait_queue,
|
|
|
|
!ice_is_reset_in_progress(pf->state),
|
|
|
|
timeout);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
else if (!ret)
|
|
|
|
return -EBUSY;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-03-01 07:24:24 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_update_q_map - update our copy of the VSI info with new queue map
|
|
|
|
* @vsi: VSI being configured
|
|
|
|
* @ctx: the context buffer returned from AQ VSI update command
|
|
|
|
*/
|
|
|
|
static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
|
|
|
|
{
|
|
|
|
vsi->info.mapping_flags = ctx->info.mapping_flags;
|
|
|
|
memcpy(&vsi->info.q_mapping, &ctx->info.q_mapping,
|
|
|
|
sizeof(vsi->info.q_mapping));
|
|
|
|
memcpy(&vsi->info.tc_mapping, ctx->info.tc_mapping,
|
|
|
|
sizeof(vsi->info.tc_mapping));
|
|
|
|
}
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
|
|
|
|
* @vsi: the VSI being configured
|
|
|
|
* @ena_tc: TC map to be enabled
|
|
|
|
*/
|
|
|
|
void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
|
|
|
|
{
|
|
|
|
struct net_device *netdev = vsi->netdev;
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
int numtc = vsi->tc_cfg.numtc;
|
|
|
|
struct ice_dcbx_cfg *dcbcfg;
|
|
|
|
u8 netdev_tc;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!netdev)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* CHNL VSI doesn't have it's own netdev, hence, no netdev_tc */
|
|
|
|
if (vsi->type == ICE_VSI_CHNL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (!ena_tc) {
|
|
|
|
netdev_reset_tc(netdev);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vsi->type == ICE_VSI_PF && ice_is_adq_active(pf))
|
|
|
|
numtc = vsi->all_numtc;
|
|
|
|
|
|
|
|
if (netdev_set_num_tc(netdev, numtc))
|
|
|
|
return;
|
|
|
|
|
|
|
|
dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
|
|
|
|
|
|
|
|
ice_for_each_traffic_class(i)
|
|
|
|
if (vsi->tc_cfg.ena_tc & BIT(i))
|
|
|
|
netdev_set_tc_queue(netdev,
|
|
|
|
vsi->tc_cfg.tc_info[i].netdev_tc,
|
|
|
|
vsi->tc_cfg.tc_info[i].qcount_tx,
|
|
|
|
vsi->tc_cfg.tc_info[i].qoffset);
|
|
|
|
/* setup TC queue map for CHNL TCs */
|
|
|
|
ice_for_each_chnl_tc(i) {
|
|
|
|
if (!(vsi->all_enatc & BIT(i)))
|
|
|
|
break;
|
|
|
|
if (!vsi->mqprio_qopt.qopt.count[i])
|
|
|
|
break;
|
|
|
|
netdev_set_tc_queue(netdev, i,
|
|
|
|
vsi->mqprio_qopt.qopt.count[i],
|
|
|
|
vsi->mqprio_qopt.qopt.offset[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
|
|
|
|
u8 ets_tc = dcbcfg->etscfg.prio_table[i];
|
|
|
|
|
|
|
|
/* Get the mapped netdev TC# for the UP */
|
|
|
|
netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc;
|
|
|
|
netdev_set_prio_tc_map(netdev, i, netdev_tc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_setup_q_map_mqprio - Prepares mqprio based tc_config
|
|
|
|
* @vsi: the VSI being configured,
|
|
|
|
* @ctxt: VSI context structure
|
|
|
|
* @ena_tc: number of traffic classes to enable
|
|
|
|
*
|
|
|
|
* Prepares VSI tc_config to have queue configurations based on MQPRIO options.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt,
|
|
|
|
u8 ena_tc)
|
|
|
|
{
|
|
|
|
u16 pow, offset = 0, qcount_tx = 0, qcount_rx = 0, qmap;
|
|
|
|
u16 tc0_offset = vsi->mqprio_qopt.qopt.offset[0];
|
|
|
|
int tc0_qcount = vsi->mqprio_qopt.qopt.count[0];
|
|
|
|
u8 netdev_tc = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
vsi->tc_cfg.ena_tc = ena_tc ? ena_tc : 1;
|
|
|
|
|
|
|
|
pow = order_base_2(tc0_qcount);
|
|
|
|
qmap = ((tc0_offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
|
|
|
|
ICE_AQ_VSI_TC_Q_OFFSET_M) |
|
|
|
|
((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M);
|
|
|
|
|
|
|
|
ice_for_each_traffic_class(i) {
|
|
|
|
if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
|
|
|
|
/* TC is not enabled */
|
|
|
|
vsi->tc_cfg.tc_info[i].qoffset = 0;
|
|
|
|
vsi->tc_cfg.tc_info[i].qcount_rx = 1;
|
|
|
|
vsi->tc_cfg.tc_info[i].qcount_tx = 1;
|
|
|
|
vsi->tc_cfg.tc_info[i].netdev_tc = 0;
|
|
|
|
ctxt->info.tc_mapping[i] = 0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
offset = vsi->mqprio_qopt.qopt.offset[i];
|
|
|
|
qcount_rx = vsi->mqprio_qopt.qopt.count[i];
|
|
|
|
qcount_tx = vsi->mqprio_qopt.qopt.count[i];
|
|
|
|
vsi->tc_cfg.tc_info[i].qoffset = offset;
|
|
|
|
vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx;
|
|
|
|
vsi->tc_cfg.tc_info[i].qcount_tx = qcount_tx;
|
|
|
|
vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vsi->all_numtc && vsi->all_numtc != vsi->tc_cfg.numtc) {
|
|
|
|
ice_for_each_chnl_tc(i) {
|
|
|
|
if (!(vsi->all_enatc & BIT(i)))
|
|
|
|
continue;
|
|
|
|
offset = vsi->mqprio_qopt.qopt.offset[i];
|
|
|
|
qcount_rx = vsi->mqprio_qopt.qopt.count[i];
|
|
|
|
qcount_tx = vsi->mqprio_qopt.qopt.count[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Set actual Tx/Rx queue pairs */
|
|
|
|
vsi->num_txq = offset + qcount_tx;
|
|
|
|
vsi->num_rxq = offset + qcount_rx;
|
|
|
|
|
|
|
|
/* Setup queue TC[0].qmap for given VSI context */
|
|
|
|
ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
|
|
|
|
ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
|
|
|
|
ctxt->info.q_mapping[1] = cpu_to_le16(tc0_qcount);
|
|
|
|
|
|
|
|
/* Find queue count available for channel VSIs and starting offset
|
|
|
|
* for channel VSIs
|
|
|
|
*/
|
|
|
|
if (tc0_qcount && tc0_qcount < vsi->num_rxq) {
|
|
|
|
vsi->cnt_q_avail = vsi->num_rxq - tc0_qcount;
|
|
|
|
vsi->next_base_q = tc0_qcount;
|
|
|
|
}
|
|
|
|
dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_txq = %d\n", vsi->num_txq);
|
|
|
|
dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_rxq = %d\n", vsi->num_rxq);
|
|
|
|
dev_dbg(ice_pf_to_dev(vsi->back), "all_numtc %u, all_enatc: 0x%04x, tc_cfg.numtc %u\n",
|
|
|
|
vsi->all_numtc, vsi->all_enatc, vsi->tc_cfg.numtc);
|
|
|
|
}
|
|
|
|
|
2019-03-01 07:24:24 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_cfg_tc - Configure VSI Tx Sched for given TC map
|
|
|
|
* @vsi: VSI to be configured
|
|
|
|
* @ena_tc: TC bitmap
|
|
|
|
*
|
|
|
|
* VSI queues expected to be quiesced before calling this function
|
|
|
|
*/
|
|
|
|
int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
|
|
|
|
{
|
|
|
|
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
|
|
|
|
struct ice_pf *pf = vsi->back;
|
2020-02-06 17:20:13 +08:00
|
|
|
struct ice_vsi_ctx *ctx;
|
2019-11-08 22:23:26 +08:00
|
|
|
struct device *dev;
|
2019-03-01 07:24:24 +08:00
|
|
|
int i, ret = 0;
|
|
|
|
u8 num_tc = 0;
|
|
|
|
|
2019-11-08 22:23:26 +08:00
|
|
|
dev = ice_pf_to_dev(pf);
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi->tc_cfg.ena_tc == ena_tc &&
|
|
|
|
vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
|
|
|
|
return ret;
|
2019-11-08 22:23:26 +08:00
|
|
|
|
2019-03-01 07:24:24 +08:00
|
|
|
ice_for_each_traffic_class(i) {
|
|
|
|
/* build bitmap of enabled TCs */
|
|
|
|
if (ena_tc & BIT(i))
|
|
|
|
num_tc++;
|
|
|
|
/* populate max_txqs per TC */
|
2019-06-26 17:20:20 +08:00
|
|
|
max_txqs[i] = vsi->alloc_txq;
|
2021-10-16 07:35:15 +08:00
|
|
|
/* Update max_txqs if it is CHNL VSI, because alloc_t[r]xq are
|
|
|
|
* zero for CHNL VSI, hence use num_txq instead as max_txqs
|
|
|
|
*/
|
|
|
|
if (vsi->type == ICE_VSI_CHNL &&
|
|
|
|
test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
|
|
|
|
max_txqs[i] = vsi->num_txq;
|
2019-03-01 07:24:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
vsi->tc_cfg.ena_tc = ena_tc;
|
|
|
|
vsi->tc_cfg.numtc = num_tc;
|
|
|
|
|
2019-11-08 22:23:25 +08:00
|
|
|
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
|
2019-03-01 07:24:24 +08:00
|
|
|
if (!ctx)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
ctx->vf_num = 0;
|
|
|
|
ctx->info = vsi->info;
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi->type == ICE_VSI_PF &&
|
|
|
|
test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
|
|
|
|
ice_vsi_setup_q_map_mqprio(vsi, ctx, ena_tc);
|
|
|
|
else
|
|
|
|
ice_vsi_setup_q_map(vsi, ctx);
|
2019-03-01 07:24:24 +08:00
|
|
|
|
|
|
|
/* must to indicate which section of VSI context are being modified */
|
|
|
|
ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
|
2021-10-08 07:00:23 +08:00
|
|
|
ret = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
|
|
|
|
if (ret) {
|
2019-11-08 22:23:26 +08:00
|
|
|
dev_info(dev, "Failed VSI Update\n");
|
2019-03-01 07:24:24 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
if (vsi->type == ICE_VSI_PF &&
|
|
|
|
test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
|
2021-10-08 07:00:23 +08:00
|
|
|
ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, max_txqs);
|
2021-10-16 07:35:15 +08:00
|
|
|
else
|
2021-10-08 07:00:23 +08:00
|
|
|
ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx,
|
|
|
|
vsi->tc_cfg.ena_tc, max_txqs);
|
2019-03-01 07:24:24 +08:00
|
|
|
|
2021-10-08 07:00:23 +08:00
|
|
|
if (ret) {
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_err(dev, "VSI %d failed TC config, error %d\n",
|
2021-10-08 07:00:23 +08:00
|
|
|
vsi->vsi_num, ret);
|
2019-03-01 07:24:24 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
ice_vsi_update_q_map(vsi, ctx);
|
|
|
|
vsi->info.valid_sections = 0;
|
|
|
|
|
|
|
|
ice_vsi_cfg_netdev_tc(vsi, ena_tc);
|
|
|
|
out:
|
2019-11-08 22:23:25 +08:00
|
|
|
kfree(ctx);
|
2019-03-01 07:24:24 +08:00
|
|
|
return ret;
|
|
|
|
}
|
ice: Fix issues updating VSI MAC filters
VSI, especially VF could request to add or remove filter for another VSI,
driver should really guide such request and disallow it.
However, instead of returning error for such malicious request, driver
can simply return success.
In addition, we are not tracking number of MAC filters configured per
VF correctly - and this leads to issue updating VF MAC filters whenever
they were removed and re-configured via bringing VF interface down and
up. Also, since VF could send request to update multiple MAC filters at
once, driver should program those filters individually in the switch, in
order to determine which action resulted to error, and communicate
accordingly to the VF.
So, with this changes, we now track number of filters added right from
when VF resources allocation is done, and could properly add filters for
both trusted and non_trusted VFs, without MAC filters mis-match issue in
the switch...
Also refactor code, so that driver can use new function to add or remove
MAC filters.
Signed-off-by: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-07-25 17:53:51 +08:00
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
/**
|
|
|
|
* ice_update_ring_stats - Update ring statistics
|
2021-08-19 19:59:58 +08:00
|
|
|
* @stats: stats to be updated
|
2019-11-05 01:38:56 +08:00
|
|
|
* @pkts: number of processed packets
|
|
|
|
* @bytes: number of processed bytes
|
|
|
|
*
|
|
|
|
* This function assumes that caller has acquired a u64_stats_sync lock.
|
|
|
|
*/
|
2021-08-19 19:59:58 +08:00
|
|
|
static void ice_update_ring_stats(struct ice_q_stats *stats, u64 pkts, u64 bytes)
|
2019-11-05 01:38:56 +08:00
|
|
|
{
|
2021-08-19 19:59:58 +08:00
|
|
|
stats->bytes += bytes;
|
|
|
|
stats->pkts += pkts;
|
2019-11-05 01:38:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_update_tx_ring_stats - Update Tx ring specific counters
|
|
|
|
* @tx_ring: ring to update
|
|
|
|
* @pkts: number of processed packets
|
|
|
|
* @bytes: number of processed bytes
|
|
|
|
*/
|
2021-08-19 19:59:58 +08:00
|
|
|
void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes)
|
2019-11-05 01:38:56 +08:00
|
|
|
{
|
|
|
|
u64_stats_update_begin(&tx_ring->syncp);
|
2021-08-19 19:59:58 +08:00
|
|
|
ice_update_ring_stats(&tx_ring->stats, pkts, bytes);
|
2019-11-05 01:38:56 +08:00
|
|
|
u64_stats_update_end(&tx_ring->syncp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_update_rx_ring_stats - Update Rx ring specific counters
|
|
|
|
* @rx_ring: ring to update
|
|
|
|
* @pkts: number of processed packets
|
|
|
|
* @bytes: number of processed bytes
|
|
|
|
*/
|
2021-08-19 19:59:58 +08:00
|
|
|
void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes)
|
2019-11-05 01:38:56 +08:00
|
|
|
{
|
|
|
|
u64_stats_update_begin(&rx_ring->syncp);
|
2021-08-19 19:59:58 +08:00
|
|
|
ice_update_ring_stats(&rx_ring->stats, pkts, bytes);
|
2019-11-05 01:38:56 +08:00
|
|
|
u64_stats_update_end(&rx_ring->syncp);
|
|
|
|
}
|
|
|
|
|
2019-12-12 19:12:55 +08:00
|
|
|
/**
|
|
|
|
* ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used
|
|
|
|
* @sw: switch to check if its default forwarding VSI is free
|
|
|
|
*
|
|
|
|
* Return true if the default forwarding VSI is already being used, else returns
|
|
|
|
* false signalling that it's available to use.
|
|
|
|
*/
|
|
|
|
bool ice_is_dflt_vsi_in_use(struct ice_sw *sw)
|
|
|
|
{
|
|
|
|
return (sw->dflt_vsi && sw->dflt_vsi_ena);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_is_vsi_dflt_vsi - check if the VSI passed in is the default VSI
|
|
|
|
* @sw: switch for the default forwarding VSI to compare against
|
|
|
|
* @vsi: VSI to compare against default forwarding VSI
|
|
|
|
*
|
|
|
|
* If this VSI passed in is the default forwarding VSI then return true, else
|
|
|
|
* return false
|
|
|
|
*/
|
|
|
|
bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
return (sw->dflt_vsi == vsi && sw->dflt_vsi_ena);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_set_dflt_vsi - set the default forwarding VSI
|
|
|
|
* @sw: switch used to assign the default forwarding VSI
|
|
|
|
* @vsi: VSI getting set as the default forwarding VSI on the switch
|
|
|
|
*
|
|
|
|
* If the VSI passed in is already the default VSI and it's enabled just return
|
|
|
|
* success.
|
|
|
|
*
|
|
|
|
* If there is already a default VSI on the switch and it's enabled then return
|
|
|
|
* -EEXIST since there can only be one default VSI per switch.
|
|
|
|
*
|
|
|
|
* Otherwise try to set the VSI passed in as the switch's default VSI and
|
|
|
|
* return the result.
|
|
|
|
*/
|
|
|
|
int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
struct device *dev;
|
2021-10-08 06:59:03 +08:00
|
|
|
int status;
|
2019-12-12 19:12:55 +08:00
|
|
|
|
|
|
|
if (!sw || !vsi)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
dev = ice_pf_to_dev(vsi->back);
|
|
|
|
|
|
|
|
/* the VSI passed in is already the default VSI */
|
|
|
|
if (ice_is_vsi_dflt_vsi(sw, vsi)) {
|
|
|
|
dev_dbg(dev, "VSI %d passed in is already the default forwarding VSI, nothing to do\n",
|
|
|
|
vsi->vsi_num);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* another VSI is already the default VSI for this switch */
|
|
|
|
if (ice_is_dflt_vsi_in_use(sw)) {
|
2020-02-06 17:20:10 +08:00
|
|
|
dev_err(dev, "Default forwarding VSI %d already in use, disable it and try again\n",
|
2019-12-12 19:12:55 +08:00
|
|
|
sw->dflt_vsi->vsi_num);
|
|
|
|
return -EEXIST;
|
|
|
|
}
|
|
|
|
|
|
|
|
status = ice_cfg_dflt_vsi(&vsi->back->hw, vsi->idx, true, ICE_FLTR_RX);
|
|
|
|
if (status) {
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_err(dev, "Failed to set VSI %d as the default forwarding VSI, error %d\n",
|
|
|
|
vsi->vsi_num, status);
|
2021-10-08 07:01:58 +08:00
|
|
|
return status;
|
2019-12-12 19:12:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
sw->dflt_vsi = vsi;
|
|
|
|
sw->dflt_vsi_ena = true;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_clear_dflt_vsi - clear the default forwarding VSI
|
|
|
|
* @sw: switch used to clear the default VSI
|
|
|
|
*
|
|
|
|
* If the switch has no default VSI or it's not enabled then return error.
|
|
|
|
*
|
|
|
|
* Otherwise try to clear the default VSI and return the result.
|
|
|
|
*/
|
|
|
|
int ice_clear_dflt_vsi(struct ice_sw *sw)
|
|
|
|
{
|
|
|
|
struct ice_vsi *dflt_vsi;
|
|
|
|
struct device *dev;
|
2021-10-08 06:59:03 +08:00
|
|
|
int status;
|
2019-12-12 19:12:55 +08:00
|
|
|
|
|
|
|
if (!sw)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
dev = ice_pf_to_dev(sw->pf);
|
|
|
|
|
|
|
|
dflt_vsi = sw->dflt_vsi;
|
|
|
|
|
|
|
|
/* there is no default VSI configured */
|
|
|
|
if (!ice_is_dflt_vsi_in_use(sw))
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
status = ice_cfg_dflt_vsi(&dflt_vsi->back->hw, dflt_vsi->idx, false,
|
|
|
|
ICE_FLTR_RX);
|
|
|
|
if (status) {
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_err(dev, "Failed to clear the default forwarding VSI %d, error %d\n",
|
|
|
|
dflt_vsi->vsi_num, status);
|
2019-12-12 19:12:55 +08:00
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
sw->dflt_vsi = NULL;
|
|
|
|
sw->dflt_vsi_ena = false;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2021-03-26 06:35:07 +08:00
|
|
|
|
2021-09-14 02:22:19 +08:00
|
|
|
/**
|
|
|
|
* ice_get_link_speed_mbps - get link speed in Mbps
|
|
|
|
* @vsi: the VSI whose link speed is being queried
|
|
|
|
*
|
|
|
|
* Return current VSI link speed and 0 if the speed is unknown.
|
|
|
|
*/
|
|
|
|
int ice_get_link_speed_mbps(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
switch (vsi->port_info->phy.link_info.link_speed) {
|
|
|
|
case ICE_AQ_LINK_SPEED_100GB:
|
|
|
|
return SPEED_100000;
|
|
|
|
case ICE_AQ_LINK_SPEED_50GB:
|
|
|
|
return SPEED_50000;
|
|
|
|
case ICE_AQ_LINK_SPEED_40GB:
|
|
|
|
return SPEED_40000;
|
|
|
|
case ICE_AQ_LINK_SPEED_25GB:
|
|
|
|
return SPEED_25000;
|
|
|
|
case ICE_AQ_LINK_SPEED_20GB:
|
|
|
|
return SPEED_20000;
|
|
|
|
case ICE_AQ_LINK_SPEED_10GB:
|
|
|
|
return SPEED_10000;
|
|
|
|
case ICE_AQ_LINK_SPEED_5GB:
|
|
|
|
return SPEED_5000;
|
|
|
|
case ICE_AQ_LINK_SPEED_2500MB:
|
|
|
|
return SPEED_2500;
|
|
|
|
case ICE_AQ_LINK_SPEED_1000MB:
|
|
|
|
return SPEED_1000;
|
|
|
|
case ICE_AQ_LINK_SPEED_100MB:
|
|
|
|
return SPEED_100;
|
|
|
|
case ICE_AQ_LINK_SPEED_10MB:
|
|
|
|
return SPEED_10;
|
|
|
|
case ICE_AQ_LINK_SPEED_UNKNOWN:
|
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_get_link_speed_kbps - get link speed in Kbps
|
|
|
|
* @vsi: the VSI whose link speed is being queried
|
|
|
|
*
|
|
|
|
* Return current VSI link speed and 0 if the speed is unknown.
|
|
|
|
*/
|
2021-10-16 07:35:16 +08:00
|
|
|
int ice_get_link_speed_kbps(struct ice_vsi *vsi)
|
2021-09-14 02:22:19 +08:00
|
|
|
{
|
|
|
|
int speed_mbps;
|
|
|
|
|
|
|
|
speed_mbps = ice_get_link_speed_mbps(vsi);
|
|
|
|
|
|
|
|
return speed_mbps * 1000;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_set_min_bw_limit - setup minimum BW limit for Tx based on min_tx_rate
|
|
|
|
* @vsi: VSI to be configured
|
|
|
|
* @min_tx_rate: min Tx rate in Kbps to be configured as BW limit
|
|
|
|
*
|
|
|
|
* If the min_tx_rate is specified as 0 that means to clear the minimum BW limit
|
|
|
|
* profile, otherwise a non-zero value will force a minimum BW limit for the VSI
|
|
|
|
* on TC 0.
|
|
|
|
*/
|
|
|
|
int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate)
|
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
struct device *dev;
|
2021-10-08 06:59:03 +08:00
|
|
|
int status;
|
2021-09-14 02:22:19 +08:00
|
|
|
int speed;
|
|
|
|
|
|
|
|
dev = ice_pf_to_dev(pf);
|
|
|
|
if (!vsi->port_info) {
|
|
|
|
dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n",
|
|
|
|
vsi->idx, vsi->type);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
speed = ice_get_link_speed_kbps(vsi);
|
|
|
|
if (min_tx_rate > (u64)speed) {
|
|
|
|
dev_err(dev, "invalid min Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n",
|
|
|
|
min_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx,
|
|
|
|
speed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Configure min BW for VSI limit */
|
|
|
|
if (min_tx_rate) {
|
|
|
|
status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0,
|
|
|
|
ICE_MIN_BW, min_tx_rate);
|
|
|
|
if (status) {
|
|
|
|
dev_err(dev, "failed to set min Tx rate(%llu Kbps) for %s %d\n",
|
|
|
|
min_tx_rate, ice_vsi_type_str(vsi->type),
|
|
|
|
vsi->idx);
|
2021-10-08 07:01:58 +08:00
|
|
|
return status;
|
2021-09-14 02:22:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
dev_dbg(dev, "set min Tx rate(%llu Kbps) for %s\n",
|
|
|
|
min_tx_rate, ice_vsi_type_str(vsi->type));
|
|
|
|
} else {
|
|
|
|
status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info,
|
|
|
|
vsi->idx, 0,
|
|
|
|
ICE_MIN_BW);
|
|
|
|
if (status) {
|
|
|
|
dev_err(dev, "failed to clear min Tx rate configuration for %s %d\n",
|
|
|
|
ice_vsi_type_str(vsi->type), vsi->idx);
|
2021-10-08 07:01:58 +08:00
|
|
|
return status;
|
2021-09-14 02:22:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
dev_dbg(dev, "cleared min Tx rate configuration for %s %d\n",
|
|
|
|
ice_vsi_type_str(vsi->type), vsi->idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_set_max_bw_limit - setup maximum BW limit for Tx based on max_tx_rate
|
|
|
|
* @vsi: VSI to be configured
|
|
|
|
* @max_tx_rate: max Tx rate in Kbps to be configured as BW limit
|
|
|
|
*
|
|
|
|
* If the max_tx_rate is specified as 0 that means to clear the maximum BW limit
|
|
|
|
* profile, otherwise a non-zero value will force a maximum BW limit for the VSI
|
|
|
|
* on TC 0.
|
|
|
|
*/
|
|
|
|
int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate)
|
|
|
|
{
|
|
|
|
struct ice_pf *pf = vsi->back;
|
|
|
|
struct device *dev;
|
2021-10-08 06:59:03 +08:00
|
|
|
int status;
|
2021-09-14 02:22:19 +08:00
|
|
|
int speed;
|
|
|
|
|
|
|
|
dev = ice_pf_to_dev(pf);
|
|
|
|
if (!vsi->port_info) {
|
|
|
|
dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n",
|
|
|
|
vsi->idx, vsi->type);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
speed = ice_get_link_speed_kbps(vsi);
|
|
|
|
if (max_tx_rate > (u64)speed) {
|
|
|
|
dev_err(dev, "invalid max Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n",
|
|
|
|
max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx,
|
|
|
|
speed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Configure max BW for VSI limit */
|
|
|
|
if (max_tx_rate) {
|
|
|
|
status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0,
|
|
|
|
ICE_MAX_BW, max_tx_rate);
|
|
|
|
if (status) {
|
|
|
|
dev_err(dev, "failed setting max Tx rate(%llu Kbps) for %s %d\n",
|
|
|
|
max_tx_rate, ice_vsi_type_str(vsi->type),
|
|
|
|
vsi->idx);
|
2021-10-08 07:01:58 +08:00
|
|
|
return status;
|
2021-09-14 02:22:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
dev_dbg(dev, "set max Tx rate(%llu Kbps) for %s %d\n",
|
|
|
|
max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx);
|
|
|
|
} else {
|
|
|
|
status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info,
|
|
|
|
vsi->idx, 0,
|
|
|
|
ICE_MAX_BW);
|
|
|
|
if (status) {
|
|
|
|
dev_err(dev, "failed clearing max Tx rate configuration for %s %d\n",
|
|
|
|
ice_vsi_type_str(vsi->type), vsi->idx);
|
2021-10-08 07:01:58 +08:00
|
|
|
return status;
|
2021-09-14 02:22:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
dev_dbg(dev, "cleared max Tx rate configuration for %s %d\n",
|
|
|
|
ice_vsi_type_str(vsi->type), vsi->idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-03-26 06:35:07 +08:00
|
|
|
/**
|
|
|
|
* ice_set_link - turn on/off physical link
|
|
|
|
* @vsi: VSI to modify physical link on
|
|
|
|
* @ena: turn on/off physical link
|
|
|
|
*/
|
|
|
|
int ice_set_link(struct ice_vsi *vsi, bool ena)
|
|
|
|
{
|
|
|
|
struct device *dev = ice_pf_to_dev(vsi->back);
|
|
|
|
struct ice_port_info *pi = vsi->port_info;
|
|
|
|
struct ice_hw *hw = pi->hw;
|
2021-10-08 06:56:57 +08:00
|
|
|
int status;
|
2021-03-26 06:35:07 +08:00
|
|
|
|
|
|
|
if (vsi->type != ICE_VSI_PF)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
status = ice_aq_set_link_restart_an(pi, ena, NULL);
|
|
|
|
|
|
|
|
/* if link is owned by manageability, FW will return ICE_AQ_RC_EMODE.
|
|
|
|
* this is not a fatal error, so print a warning message and return
|
|
|
|
* a success code. Return an error if FW returns an error code other
|
|
|
|
* than ICE_AQ_RC_EMODE
|
|
|
|
*/
|
2021-10-08 06:58:01 +08:00
|
|
|
if (status == -EIO) {
|
2021-03-26 06:35:07 +08:00
|
|
|
if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_warn(dev, "can't set link to %s, err %d aq_err %s. not fatal, continuing\n",
|
|
|
|
(ena ? "ON" : "OFF"), status,
|
2021-03-26 06:35:07 +08:00
|
|
|
ice_aq_str(hw->adminq.sq_last_status));
|
|
|
|
} else if (status) {
|
2021-10-08 06:56:02 +08:00
|
|
|
dev_err(dev, "can't set link to %s, err %d aq_err %s\n",
|
|
|
|
(ena ? "ON" : "OFF"), status,
|
2021-03-26 06:35:07 +08:00
|
|
|
ice_aq_str(hw->adminq.sq_last_status));
|
2021-10-08 07:01:58 +08:00
|
|
|
return status;
|
2021-03-26 06:35:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2021-07-17 06:16:41 +08:00
|
|
|
|
2021-12-03 00:38:40 +08:00
|
|
|
/**
|
|
|
|
* ice_vsi_add_vlan_zero - add VLAN 0 filter(s) for this VSI
|
|
|
|
* @vsi: VSI used to add VLAN filters
|
2021-12-03 00:38:46 +08:00
|
|
|
*
|
|
|
|
* In Single VLAN Mode (SVM), single VLAN filters via ICE_SW_LKUP_VLAN are based
|
|
|
|
* on the inner VLAN ID, so the VLAN TPID (i.e. 0x8100 or 0x888a8) doesn't
|
|
|
|
* matter. In Double VLAN Mode (DVM), outer/single VLAN filters via
|
|
|
|
* ICE_SW_LKUP_VLAN are based on the outer/single VLAN ID + VLAN TPID.
|
|
|
|
*
|
|
|
|
* For both modes add a VLAN 0 + no VLAN TPID filter to handle untagged traffic
|
|
|
|
* when VLAN pruning is enabled. Also, this handles VLAN 0 priority tagged
|
|
|
|
* traffic in SVM, since the VLAN TPID isn't part of filtering.
|
|
|
|
*
|
|
|
|
* If DVM is enabled then an explicit VLAN 0 + VLAN TPID filter needs to be
|
|
|
|
* added to allow VLAN 0 priority tagged traffic in DVM, since the VLAN TPID is
|
|
|
|
* part of filtering.
|
2021-12-03 00:38:40 +08:00
|
|
|
*/
|
|
|
|
int ice_vsi_add_vlan_zero(struct ice_vsi *vsi)
|
|
|
|
{
|
2021-12-03 00:38:46 +08:00
|
|
|
struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
|
|
|
|
struct ice_vlan vlan;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
vlan = ICE_VLAN(0, 0, 0);
|
|
|
|
err = vlan_ops->add_vlan(vsi, &vlan);
|
|
|
|
if (err && err != -EEXIST)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* in SVM both VLAN 0 filters are identical */
|
|
|
|
if (!ice_is_dvm_ena(&vsi->back->hw))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
vlan = ICE_VLAN(ETH_P_8021Q, 0, 0);
|
|
|
|
err = vlan_ops->add_vlan(vsi, &vlan);
|
|
|
|
if (err && err != -EEXIST)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_del_vlan_zero - delete VLAN 0 filter(s) for this VSI
|
|
|
|
* @vsi: VSI used to add VLAN filters
|
|
|
|
*
|
|
|
|
* Delete the VLAN 0 filters in the same manner that they were added in
|
|
|
|
* ice_vsi_add_vlan_zero.
|
|
|
|
*/
|
|
|
|
int ice_vsi_del_vlan_zero(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
|
2021-12-03 00:38:42 +08:00
|
|
|
struct ice_vlan vlan;
|
2021-12-03 00:38:46 +08:00
|
|
|
int err;
|
2021-12-03 00:38:42 +08:00
|
|
|
|
2021-12-03 00:38:44 +08:00
|
|
|
vlan = ICE_VLAN(0, 0, 0);
|
2021-12-03 00:38:46 +08:00
|
|
|
err = vlan_ops->del_vlan(vsi, &vlan);
|
|
|
|
if (err && err != -EEXIST)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* in SVM both VLAN 0 filters are identical */
|
|
|
|
if (!ice_is_dvm_ena(&vsi->back->hw))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
vlan = ICE_VLAN(ETH_P_8021Q, 0, 0);
|
|
|
|
err = vlan_ops->del_vlan(vsi, &vlan);
|
|
|
|
if (err && err != -EEXIST)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_num_zero_vlans - get number of VLAN 0 filters based on VLAN mode
|
|
|
|
* @vsi: VSI used to get the VLAN mode
|
|
|
|
*
|
|
|
|
* If DVM is enabled then 2 VLAN 0 filters are added, else if SVM is enabled
|
|
|
|
* then 1 VLAN 0 filter is added. See ice_vsi_add_vlan_zero for more details.
|
|
|
|
*/
|
|
|
|
static u16 ice_vsi_num_zero_vlans(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
#define ICE_DVM_NUM_ZERO_VLAN_FLTRS 2
|
|
|
|
#define ICE_SVM_NUM_ZERO_VLAN_FLTRS 1
|
|
|
|
/* no VLAN 0 filter is created when a port VLAN is active */
|
2022-02-17 05:37:29 +08:00
|
|
|
if (vsi->type == ICE_VSI_VF) {
|
|
|
|
if (WARN_ON(!vsi->vf))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (ice_vf_is_port_vlan_ena(vsi->vf))
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-12-03 00:38:46 +08:00
|
|
|
if (ice_is_dvm_ena(&vsi->back->hw))
|
|
|
|
return ICE_DVM_NUM_ZERO_VLAN_FLTRS;
|
|
|
|
else
|
|
|
|
return ICE_SVM_NUM_ZERO_VLAN_FLTRS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_has_non_zero_vlans - check if VSI has any non-zero VLANs
|
|
|
|
* @vsi: VSI used to determine if any non-zero VLANs have been added
|
|
|
|
*/
|
|
|
|
bool ice_vsi_has_non_zero_vlans(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
return (vsi->num_vlan > ice_vsi_num_zero_vlans(vsi));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_num_non_zero_vlans - get the number of non-zero VLANs for this VSI
|
|
|
|
* @vsi: VSI used to get the number of non-zero VLANs added
|
|
|
|
*/
|
|
|
|
u16 ice_vsi_num_non_zero_vlans(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
return (vsi->num_vlan - ice_vsi_num_zero_vlans(vsi));
|
2021-12-03 00:38:40 +08:00
|
|
|
}
|
|
|
|
|
2021-07-17 06:16:41 +08:00
|
|
|
/**
|
|
|
|
* ice_is_feature_supported
|
|
|
|
* @pf: pointer to the struct ice_pf instance
|
|
|
|
* @f: feature enum to be checked
|
|
|
|
*
|
|
|
|
* returns true if feature is supported, false otherwise
|
|
|
|
*/
|
|
|
|
bool ice_is_feature_supported(struct ice_pf *pf, enum ice_feature f)
|
|
|
|
{
|
|
|
|
if (f < 0 || f >= ICE_F_MAX)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return test_bit(f, pf->features);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_set_feature_support
|
|
|
|
* @pf: pointer to the struct ice_pf instance
|
|
|
|
* @f: feature enum to set
|
|
|
|
*/
|
|
|
|
static void ice_set_feature_support(struct ice_pf *pf, enum ice_feature f)
|
|
|
|
{
|
|
|
|
if (f < 0 || f >= ICE_F_MAX)
|
|
|
|
return;
|
|
|
|
|
|
|
|
set_bit(f, pf->features);
|
|
|
|
}
|
|
|
|
|
2021-08-17 19:09:18 +08:00
|
|
|
/**
|
|
|
|
* ice_clear_feature_support
|
|
|
|
* @pf: pointer to the struct ice_pf instance
|
|
|
|
* @f: feature enum to clear
|
|
|
|
*/
|
|
|
|
void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f)
|
|
|
|
{
|
|
|
|
if (f < 0 || f >= ICE_F_MAX)
|
|
|
|
return;
|
|
|
|
|
|
|
|
clear_bit(f, pf->features);
|
|
|
|
}
|
|
|
|
|
2021-07-17 06:16:41 +08:00
|
|
|
/**
|
|
|
|
* ice_init_feature_support
|
|
|
|
* @pf: pointer to the struct ice_pf instance
|
|
|
|
*
|
|
|
|
* called during init to setup supported feature
|
|
|
|
*/
|
|
|
|
void ice_init_feature_support(struct ice_pf *pf)
|
|
|
|
{
|
|
|
|
switch (pf->hw.device_id) {
|
|
|
|
case ICE_DEV_ID_E810C_BACKPLANE:
|
|
|
|
case ICE_DEV_ID_E810C_QSFP:
|
|
|
|
case ICE_DEV_ID_E810C_SFP:
|
|
|
|
ice_set_feature_support(pf, ICE_F_DSCP);
|
2022-03-02 02:38:03 +08:00
|
|
|
if (ice_is_e810t(&pf->hw)) {
|
2021-08-17 19:09:18 +08:00
|
|
|
ice_set_feature_support(pf, ICE_F_SMA_CTRL);
|
2022-03-02 02:38:03 +08:00
|
|
|
if (ice_gnss_is_gps_present(&pf->hw))
|
|
|
|
ice_set_feature_support(pf, ICE_F_GNSS);
|
|
|
|
}
|
2021-07-17 06:16:41 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2021-08-20 08:08:52 +08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_update_security - update security block in VSI
|
|
|
|
* @vsi: pointer to VSI structure
|
|
|
|
* @fill: function pointer to fill ctx
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *))
|
|
|
|
{
|
|
|
|
struct ice_vsi_ctx ctx = { 0 };
|
|
|
|
|
|
|
|
ctx.info = vsi->info;
|
|
|
|
ctx.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
|
|
|
|
fill(&ctx);
|
|
|
|
|
|
|
|
if (ice_update_vsi(&vsi->back->hw, vsi->idx, &ctx, NULL))
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
vsi->info = ctx.info;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_ctx_set_antispoof - set antispoof function in VSI ctx
|
|
|
|
* @ctx: pointer to VSI ctx structure
|
|
|
|
*/
|
|
|
|
void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx)
|
|
|
|
{
|
|
|
|
ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
|
|
|
|
(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
|
|
|
|
ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_ctx_clear_antispoof - clear antispoof function in VSI ctx
|
|
|
|
* @ctx: pointer to VSI ctx structure
|
|
|
|
*/
|
|
|
|
void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx)
|
|
|
|
{
|
|
|
|
ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF &
|
|
|
|
~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
|
|
|
|
ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_ctx_set_allow_override - allow destination override on VSI
|
|
|
|
* @ctx: pointer to VSI ctx structure
|
|
|
|
*/
|
|
|
|
void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx)
|
|
|
|
{
|
|
|
|
ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_vsi_ctx_clear_allow_override - turn off destination override on VSI
|
|
|
|
* @ctx: pointer to VSI ctx structure
|
|
|
|
*/
|
|
|
|
void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx)
|
|
|
|
{
|
|
|
|
ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
|
|
|
|
}
|