2018-03-20 22:58:05 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
/* Copyright (c) 2018, Intel Corporation. */
|
|
|
|
|
|
|
|
#ifndef _ICE_H_
|
|
|
|
#define _ICE_H_
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/module.h>
|
2019-09-09 21:47:46 +08:00
|
|
|
#include <linux/firmware.h>
|
2018-03-20 22:58:05 +08:00
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/compiler.h>
|
2018-03-20 22:58:09 +08:00
|
|
|
#include <linux/etherdevice.h>
|
2018-03-20 22:58:13 +08:00
|
|
|
#include <linux/skbuff.h>
|
2018-03-20 22:58:11 +08:00
|
|
|
#include <linux/cpumask.h>
|
2018-03-20 22:58:16 +08:00
|
|
|
#include <linux/rtnetlink.h>
|
2018-03-20 22:58:11 +08:00
|
|
|
#include <linux/if_vlan.h>
|
2018-03-20 22:58:13 +08:00
|
|
|
#include <linux/dma-mapping.h>
|
2018-03-20 22:58:05 +08:00
|
|
|
#include <linux/pci.h>
|
2018-03-20 22:58:10 +08:00
|
|
|
#include <linux/workqueue.h>
|
2018-03-20 22:58:05 +08:00
|
|
|
#include <linux/aer.h>
|
2018-03-20 22:58:10 +08:00
|
|
|
#include <linux/interrupt.h>
|
2018-03-20 22:58:16 +08:00
|
|
|
#include <linux/ethtool.h>
|
2018-03-20 22:58:10 +08:00
|
|
|
#include <linux/timer.h>
|
2018-03-20 22:58:06 +08:00
|
|
|
#include <linux/delay.h>
|
2018-03-20 22:58:05 +08:00
|
|
|
#include <linux/bitmap.h>
|
2018-03-20 22:58:11 +08:00
|
|
|
#include <linux/log2.h>
|
2018-03-20 22:58:15 +08:00
|
|
|
#include <linux/ip.h>
|
2018-12-20 02:03:32 +08:00
|
|
|
#include <linux/sctp.h>
|
2018-03-20 22:58:15 +08:00
|
|
|
#include <linux/ipv6.h>
|
2019-11-05 01:38:56 +08:00
|
|
|
#include <linux/pkt_sched.h>
|
2018-03-20 22:58:10 +08:00
|
|
|
#include <linux/if_bridge.h>
|
2019-09-09 21:47:42 +08:00
|
|
|
#include <linux/ctype.h>
|
2019-11-05 01:38:56 +08:00
|
|
|
#include <linux/bpf.h>
|
2018-09-20 08:42:55 +08:00
|
|
|
#include <linux/avf/virtchnl.h>
|
2018-03-20 22:58:15 +08:00
|
|
|
#include <net/ipv6.h>
|
2019-11-05 01:38:56 +08:00
|
|
|
#include <net/xdp_sock.h>
|
2018-03-20 22:58:05 +08:00
|
|
|
#include "ice_devids.h"
|
|
|
|
#include "ice_type.h"
|
2018-03-20 22:58:10 +08:00
|
|
|
#include "ice_txrx.h"
|
2019-03-01 07:24:22 +08:00
|
|
|
#include "ice_dcb.h"
|
ice: Get switch config, scheduler config and device capabilities
This patch adds to the initialization flow by getting switch
configuration, scheduler configuration and device capabilities.
Switch configuration:
On boot, an L2 switch element is created in the firmware per physical
function. Each physical function is also mapped to a port, to which its
switch element is connected. In other words, this switch can be visualized
as an embedded vSwitch that can connect a physical function's virtual
station interfaces (VSIs) to the egress/ingress port. Egress/ingress
filters will be eventually created and applied on this switch element.
As part of the initialization flow, the driver gets configuration data
from this switch element and stores it.
Scheduler configuration:
The Tx scheduler is a subsystem responsible for setting and enforcing QoS.
As part of the initialization flow, the driver queries and stores the
default scheduler configuration for the given physical function.
Device capabilities:
As part of initialization, the driver has to determine what the device is
capable of (ex. max queues, VSIs, etc). This information is obtained from
the firmware and stored by the driver.
CC: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-03-20 22:58:08 +08:00
|
|
|
#include "ice_switch.h"
|
2018-03-20 22:58:07 +08:00
|
|
|
#include "ice_common.h"
|
ice: Get switch config, scheduler config and device capabilities
This patch adds to the initialization flow by getting switch
configuration, scheduler configuration and device capabilities.
Switch configuration:
On boot, an L2 switch element is created in the firmware per physical
function. Each physical function is also mapped to a port, to which its
switch element is connected. In other words, this switch can be visualized
as an embedded vSwitch that can connect a physical function's virtual
station interfaces (VSIs) to the egress/ingress port. Egress/ingress
filters will be eventually created and applied on this switch element.
As part of the initialization flow, the driver gets configuration data
from this switch element and stores it.
Scheduler configuration:
The Tx scheduler is a subsystem responsible for setting and enforcing QoS.
As part of the initialization flow, the driver queries and stores the
default scheduler configuration for the given physical function.
Device capabilities:
As part of initialization, the driver has to determine what the device is
capable of (ex. max queues, VSIs, etc). This information is obtained from
the firmware and stored by the driver.
CC: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-03-20 22:58:08 +08:00
|
|
|
#include "ice_sched.h"
|
2018-09-20 08:42:55 +08:00
|
|
|
#include "ice_virtchnl_pf.h"
|
2018-09-20 08:42:57 +08:00
|
|
|
#include "ice_sriov.h"
|
2019-11-05 01:38:56 +08:00
|
|
|
#include "ice_xsk.h"
|
2018-03-20 22:58:05 +08:00
|
|
|
|
2018-03-20 22:58:16 +08:00
|
|
|
extern const char ice_drv_ver[];
|
2018-03-20 22:58:05 +08:00
|
|
|
#define ICE_BAR0 0
|
2018-03-20 22:58:11 +08:00
|
|
|
#define ICE_REQ_DESC_MULTIPLE 32
|
2019-04-17 01:34:56 +08:00
|
|
|
#define ICE_MIN_NUM_DESC 64
|
2018-09-20 08:23:11 +08:00
|
|
|
#define ICE_MAX_NUM_DESC 8160
|
2019-04-17 01:30:41 +08:00
|
|
|
#define ICE_DFLT_MIN_RX_DESC 512
|
2019-09-03 16:31:07 +08:00
|
|
|
#define ICE_DFLT_NUM_TX_DESC 256
|
|
|
|
#define ICE_DFLT_NUM_RX_DESC 2048
|
2019-02-09 04:50:59 +08:00
|
|
|
|
2018-03-20 22:58:17 +08:00
|
|
|
#define ICE_DFLT_TRAFFIC_CLASS BIT(0)
|
2018-03-20 22:58:10 +08:00
|
|
|
#define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16)
|
2018-03-20 22:58:07 +08:00
|
|
|
#define ICE_AQ_LEN 64
|
2019-07-25 16:55:38 +08:00
|
|
|
#define ICE_MBXSQ_LEN 64
|
2018-03-20 22:58:10 +08:00
|
|
|
#define ICE_MIN_MSIX 2
|
2018-03-20 22:58:11 +08:00
|
|
|
#define ICE_NO_VSI 0xffff
|
|
|
|
#define ICE_VSI_MAP_CONTIG 0
|
|
|
|
#define ICE_VSI_MAP_SCATTER 1
|
|
|
|
#define ICE_MAX_SCATTER_TXQS 16
|
|
|
|
#define ICE_MAX_SCATTER_RXQS 16
|
2018-03-20 22:58:13 +08:00
|
|
|
#define ICE_Q_WAIT_RETRY_LIMIT 10
|
|
|
|
#define ICE_Q_WAIT_MAX_RETRY (5 * ICE_Q_WAIT_RETRY_LIMIT)
|
2018-03-20 22:58:15 +08:00
|
|
|
#define ICE_MAX_LG_RSS_QS 256
|
2018-03-20 22:58:10 +08:00
|
|
|
#define ICE_RES_VALID_BIT 0x8000
|
|
|
|
#define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1)
|
2018-03-20 22:58:11 +08:00
|
|
|
#define ICE_INVAL_Q_INDEX 0xffff
|
2018-08-09 21:29:50 +08:00
|
|
|
#define ICE_INVAL_VFID 256
|
2018-03-20 22:58:05 +08:00
|
|
|
|
2018-10-27 01:40:51 +08:00
|
|
|
#define ICE_MAX_RESET_WAIT 20
|
|
|
|
|
2018-03-20 22:58:16 +08:00
|
|
|
#define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4)
|
|
|
|
|
2018-03-20 22:58:05 +08:00
|
|
|
#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
|
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
|
2018-03-20 22:58:11 +08:00
|
|
|
|
|
|
|
#define ICE_UP_TABLE_TRANSLATE(val, i) \
|
|
|
|
(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
|
|
|
|
ICE_AQ_VSI_UP_TABLE_UP##i##_M)
|
|
|
|
|
2018-03-20 22:58:14 +08:00
|
|
|
#define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i]))
|
2018-03-20 22:58:13 +08:00
|
|
|
#define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))
|
2018-03-20 22:58:15 +08:00
|
|
|
#define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i]))
|
2018-03-20 22:58:13 +08:00
|
|
|
|
2018-03-20 22:58:18 +08:00
|
|
|
/* Macro for each VSI in a PF */
|
|
|
|
#define ice_for_each_vsi(pf, i) \
|
|
|
|
for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++)
|
|
|
|
|
2018-10-27 02:44:47 +08:00
|
|
|
/* Macros for each Tx/Rx ring in a VSI */
|
2018-03-20 22:58:13 +08:00
|
|
|
#define ice_for_each_txq(vsi, i) \
|
|
|
|
for ((i) = 0; (i) < (vsi)->num_txq; (i)++)
|
|
|
|
|
|
|
|
#define ice_for_each_rxq(vsi, i) \
|
|
|
|
for ((i) = 0; (i) < (vsi)->num_rxq; (i)++)
|
|
|
|
|
2018-10-27 02:44:47 +08:00
|
|
|
/* Macros for each allocated Tx/Rx ring whether used or not in a VSI */
|
ice: Report stats for allocated queues via ethtool stats
It is not safe to have the string table for statistics change order or
size over the lifetime of a given netdevice. This is because of the
nature of the 3-step process for obtaining stats. First, user space
performs a request for the size of the strings table. Second it performs
a separate request for the strings themselves, after allocating space
for the table. Third, it requests the stats themselves, also allocating
space for the table.
If the size decreased, there is potential to see garbage data or stats
values. In the worst case, we could potentially see stats values become
mis-aligned with their strings, so that it looks like a statistic is
being reported differently than it actually is.
Even worse, if the size increased, there is potential that the strings
table or stats table was not allocated large enough and the stats code
could access and write to memory it should not, potentially resulting in
undefined behavior and system crashes.
It isn't even safe if the size always changes under the RTNL lock. This
is because the calls take place over multiple user space commands, so it
is not possible to hold the RTNL lock for the entire duration of
obtaining strings and stats. Further, not all consumers of the ethtool
API are the user space ethtool program, and it is possible that one
assumes the strings will not change (valid under the current contract),
and thus only requests the stats values when requesting stats in a loop.
Finally, it's not possible in the general case to detect when the size
changes, because it is quite possible that one value which could impact
the stat size increased, while another decreased. This would result in
the same total number of stats, but reordering them so that stats no
longer line up with the strings they belong to. Since only size changes
aren't enough, we would need some sort of hash or token to determine
when the strings no longer match. This would require extending the
ethtool stats commands, but there is no more space in the relevant
structures.
The real solution to resolve this would be to add a completely new API
for stats, probably over netlink.
In the ice driver, the only thing impacting the stats that is not
constant is the number of queues. Instead of reporting stats for each
used queue, report stats for each allocated queue. We do not change the
number of queues allocated for a given netdevice, as we pass this into
the alloc_etherdev_mq() function to set the num_tx_queues and
num_rx_queues.
This resolves the potential bugs at the slight cost of displaying many
queue statistics which will not be activated.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 21:28:54 +08:00
|
|
|
#define ice_for_each_alloc_txq(vsi, i) \
|
|
|
|
for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++)
|
|
|
|
|
|
|
|
#define ice_for_each_alloc_rxq(vsi, i) \
|
|
|
|
for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++)
|
|
|
|
|
2018-12-20 02:03:30 +08:00
|
|
|
#define ice_for_each_q_vector(vsi, i) \
|
|
|
|
for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++)
|
|
|
|
|
2019-02-27 08:35:14 +08:00
|
|
|
#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_MCAST_TX | \
|
|
|
|
ICE_PROMISC_UCAST_RX | ICE_PROMISC_MCAST_RX)
|
|
|
|
|
|
|
|
#define ICE_UCAST_VLAN_PROMISC_BITS (ICE_PROMISC_UCAST_TX | \
|
|
|
|
ICE_PROMISC_MCAST_TX | \
|
|
|
|
ICE_PROMISC_UCAST_RX | \
|
|
|
|
ICE_PROMISC_MCAST_RX | \
|
|
|
|
ICE_PROMISC_VLAN_TX | \
|
|
|
|
ICE_PROMISC_VLAN_RX)
|
|
|
|
|
|
|
|
#define ICE_MCAST_PROMISC_BITS (ICE_PROMISC_MCAST_TX | ICE_PROMISC_MCAST_RX)
|
|
|
|
|
|
|
|
#define ICE_MCAST_VLAN_PROMISC_BITS (ICE_PROMISC_MCAST_TX | \
|
|
|
|
ICE_PROMISC_MCAST_RX | \
|
|
|
|
ICE_PROMISC_VLAN_TX | \
|
|
|
|
ICE_PROMISC_VLAN_RX)
|
|
|
|
|
2019-11-08 22:23:26 +08:00
|
|
|
#define ice_pf_to_dev(pf) (&((pf)->pdev->dev))
|
|
|
|
|
2019-10-24 16:11:17 +08:00
|
|
|
struct ice_txq_meta {
|
|
|
|
u32 q_teid; /* Tx-scheduler element identifier */
|
|
|
|
u16 q_id; /* Entry in VSI's txq_map bitmap */
|
|
|
|
u16 q_handle; /* Relative index of Tx queue within TC */
|
|
|
|
u16 vsi_idx; /* VSI index that Tx queue belongs to */
|
|
|
|
u8 tc; /* TC number that Tx queue belongs to */
|
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_tc_info {
|
|
|
|
u16 qoffset;
|
2018-10-27 02:44:35 +08:00
|
|
|
u16 qcount_tx;
|
|
|
|
u16 qcount_rx;
|
|
|
|
u8 netdev_tc;
|
2018-03-20 22:58:11 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct ice_tc_cfg {
|
|
|
|
u8 numtc; /* Total number of enabled TCs */
|
2019-02-20 07:04:13 +08:00
|
|
|
u8 ena_tc; /* Tx map */
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
|
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:10 +08:00
|
|
|
struct ice_res_tracker {
|
|
|
|
u16 num_entries;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
u16 end;
|
2018-03-20 22:58:10 +08:00
|
|
|
u16 list[1];
|
|
|
|
};
|
|
|
|
|
2018-12-20 02:03:27 +08:00
|
|
|
struct ice_qs_cfg {
|
2019-02-20 07:04:12 +08:00
|
|
|
struct mutex *qs_mutex; /* will be assigned to &pf->avail_q_mutex */
|
2018-12-20 02:03:27 +08:00
|
|
|
unsigned long *pf_map;
|
|
|
|
unsigned long pf_map_size;
|
|
|
|
unsigned int q_count;
|
|
|
|
unsigned int scatter_count;
|
|
|
|
u16 *vsi_map;
|
|
|
|
u16 vsi_map_offset;
|
|
|
|
u8 mapping_mode;
|
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:10 +08:00
|
|
|
struct ice_sw {
|
|
|
|
struct ice_pf *pf;
|
|
|
|
u16 sw_id; /* switch ID for this switch */
|
|
|
|
u16 bridge_mode; /* VEB/VEPA/Port Virtualizer */
|
2019-12-12 19:12:55 +08:00
|
|
|
struct ice_vsi *dflt_vsi; /* default VSI for this switch */
|
|
|
|
u8 dflt_vsi_ena:1; /* true if above dflt_vsi is enabled */
|
2018-03-20 22:58:10 +08:00
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:05 +08:00
|
|
|
enum ice_state {
|
2019-04-17 01:30:43 +08:00
|
|
|
__ICE_TESTING,
|
2018-03-20 22:58:05 +08:00
|
|
|
__ICE_DOWN,
|
2018-03-20 22:58:18 +08:00
|
|
|
__ICE_NEEDS_RESTART,
|
2018-08-09 21:29:50 +08:00
|
|
|
__ICE_PREPARED_FOR_RESET, /* set by driver when prepared */
|
2018-09-20 08:23:11 +08:00
|
|
|
__ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */
|
2019-11-06 18:05:29 +08:00
|
|
|
__ICE_DCBNL_DEVRESET, /* set by dcbnl devreset */
|
2018-03-20 22:58:10 +08:00
|
|
|
__ICE_PFR_REQ, /* set by driver and peers */
|
2018-03-20 22:58:18 +08:00
|
|
|
__ICE_CORER_REQ, /* set by driver and peers */
|
|
|
|
__ICE_GLOBR_REQ, /* set by driver and peers */
|
|
|
|
__ICE_CORER_RECV, /* set by OICR handler */
|
|
|
|
__ICE_GLOBR_RECV, /* set by OICR handler */
|
|
|
|
__ICE_EMPR_RECV, /* set by OICR handler */
|
|
|
|
__ICE_SUSPENDED, /* set on module remove path */
|
|
|
|
__ICE_RESET_FAILED, /* set by reset/rebuild */
|
2018-09-20 08:42:55 +08:00
|
|
|
/* When checking for the PF to be in a nominal operating state, the
|
|
|
|
* bits that are grouped at the beginning of the list need to be
|
2018-10-27 02:44:46 +08:00
|
|
|
* checked. Bits occurring before __ICE_STATE_NOMINAL_CHECK_BITS will
|
|
|
|
* be checked. If you need to add a bit into consideration for nominal
|
2018-09-20 08:42:55 +08:00
|
|
|
* operating state, it must be added before
|
2018-10-27 02:44:46 +08:00
|
|
|
* __ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position
|
2018-09-20 08:42:55 +08:00
|
|
|
* without appropriate consideration.
|
|
|
|
*/
|
|
|
|
__ICE_STATE_NOMINAL_CHECK_BITS,
|
2018-03-20 22:58:10 +08:00
|
|
|
__ICE_ADMINQ_EVENT_PENDING,
|
2018-09-20 08:42:54 +08:00
|
|
|
__ICE_MAILBOXQ_EVENT_PENDING,
|
2018-08-09 21:29:53 +08:00
|
|
|
__ICE_MDD_EVENT_PENDING,
|
2018-09-20 08:42:57 +08:00
|
|
|
__ICE_VFLR_EVENT_PENDING,
|
2018-03-20 22:58:19 +08:00
|
|
|
__ICE_FLTR_OVERFLOW_PROMISC,
|
2018-09-20 08:42:55 +08:00
|
|
|
__ICE_VF_DIS,
|
2018-03-20 22:58:16 +08:00
|
|
|
__ICE_CFG_BUSY,
|
2018-03-20 22:58:10 +08:00
|
|
|
__ICE_SERVICE_SCHED,
|
2018-08-09 21:29:57 +08:00
|
|
|
__ICE_SERVICE_DIS,
|
2019-07-25 16:55:30 +08:00
|
|
|
__ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */
|
2020-02-14 05:31:16 +08:00
|
|
|
__ICE_MDD_VF_PRINT_PENDING, /* set when MDD event handle */
|
2020-02-28 02:14:55 +08:00
|
|
|
__ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */
|
2018-03-20 22:58:05 +08:00
|
|
|
__ICE_STATE_NBITS /* must be last */
|
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:19 +08:00
|
|
|
enum ice_vsi_flags {
|
|
|
|
ICE_VSI_FLAG_UMAC_FLTR_CHANGED,
|
|
|
|
ICE_VSI_FLAG_MMAC_FLTR_CHANGED,
|
|
|
|
ICE_VSI_FLAG_VLAN_FLTR_CHANGED,
|
|
|
|
ICE_VSI_FLAG_PROMISC_CHANGED,
|
|
|
|
ICE_VSI_FLAG_NBITS /* must be last */
|
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:10 +08:00
|
|
|
/* struct that defines a VSI, associated with a dev */
|
|
|
|
struct ice_vsi {
|
|
|
|
struct net_device *netdev;
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_sw *vsw; /* switch this VSI is on */
|
|
|
|
struct ice_pf *back; /* back pointer to PF */
|
2018-03-20 22:58:10 +08:00
|
|
|
struct ice_port_info *port_info; /* back pointer to port_info */
|
2018-10-27 02:44:47 +08:00
|
|
|
struct ice_ring **rx_rings; /* Rx ring array */
|
|
|
|
struct ice_ring **tx_rings; /* Tx ring array */
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_q_vector **q_vectors; /* q_vector array */
|
2018-03-20 22:58:13 +08:00
|
|
|
|
|
|
|
irqreturn_t (*irq_handler)(int irq, void *data);
|
|
|
|
|
2018-03-20 22:58:16 +08:00
|
|
|
u64 tx_linearize;
|
2018-03-20 22:58:11 +08:00
|
|
|
DECLARE_BITMAP(state, __ICE_STATE_NBITS);
|
2018-03-20 22:58:19 +08:00
|
|
|
DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS);
|
|
|
|
unsigned int current_netdev_flags;
|
2018-03-20 22:58:16 +08:00
|
|
|
u32 tx_restart;
|
|
|
|
u32 tx_busy;
|
|
|
|
u32 rx_buf_failed;
|
|
|
|
u32 rx_page_failed;
|
2018-03-20 22:58:11 +08:00
|
|
|
int num_q_vectors;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
int base_vector; /* IRQ base for OS reserved vectors */
|
2018-03-20 22:58:11 +08:00
|
|
|
enum ice_vsi_type type;
|
2018-10-27 02:44:46 +08:00
|
|
|
u16 vsi_num; /* HW (absolute) index of this VSI */
|
|
|
|
u16 idx; /* software index in pf->vsi[] */
|
2018-03-20 22:58:11 +08:00
|
|
|
|
2018-09-20 08:42:56 +08:00
|
|
|
s16 vf_id; /* VF ID for SR-IOV VSIs */
|
|
|
|
|
2019-04-17 01:21:24 +08:00
|
|
|
u16 ethtype; /* Ethernet protocol for pause frame */
|
|
|
|
|
2018-03-20 22:58:15 +08:00
|
|
|
/* RSS config */
|
|
|
|
u16 rss_table_size; /* HW RSS table size */
|
|
|
|
u16 rss_size; /* Allocated RSS queues */
|
|
|
|
u8 *rss_hkey_user; /* User configured hash keys */
|
|
|
|
u8 *rss_lut_user; /* User configured lookup table entries */
|
|
|
|
u8 rss_lut_type; /* used to configure Get/Set RSS LUT AQ call */
|
|
|
|
|
2018-03-20 22:58:13 +08:00
|
|
|
u16 max_frame;
|
|
|
|
u16 rx_buf_len;
|
|
|
|
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_aqc_vsi_props info; /* VSI properties */
|
|
|
|
|
2018-03-20 22:58:16 +08:00
|
|
|
/* VSI stats */
|
|
|
|
struct rtnl_link_stats64 net_stats;
|
|
|
|
struct ice_eth_stats eth_stats;
|
|
|
|
struct ice_eth_stats eth_stats_prev;
|
|
|
|
|
2018-03-20 22:58:19 +08:00
|
|
|
struct list_head tmp_sync_list; /* MAC filters to be synced */
|
|
|
|
struct list_head tmp_unsync_list; /* MAC filters to be unsynced */
|
|
|
|
|
2019-04-17 01:24:35 +08:00
|
|
|
u8 irqs_ready:1;
|
|
|
|
u8 current_isup:1; /* Sync 'link up' logging */
|
|
|
|
u8 stat_offsets_loaded:1;
|
|
|
|
u8 vlan_ena:1;
|
2019-12-12 19:12:54 +08:00
|
|
|
u16 num_vlan;
|
2018-03-20 22:58:13 +08:00
|
|
|
|
2018-03-20 22:58:11 +08:00
|
|
|
/* queue information */
|
|
|
|
u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
|
|
|
|
u8 rx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
|
2019-08-02 16:25:21 +08:00
|
|
|
u16 *txq_map; /* index in pf->avail_txqs */
|
|
|
|
u16 *rxq_map; /* index in pf->avail_rxqs */
|
2018-03-20 22:58:11 +08:00
|
|
|
u16 alloc_txq; /* Allocated Tx queues */
|
|
|
|
u16 num_txq; /* Used Tx queues */
|
|
|
|
u16 alloc_rxq; /* Allocated Rx queues */
|
|
|
|
u16 num_rxq; /* Used Rx queues */
|
2019-11-08 22:23:29 +08:00
|
|
|
u16 req_txq; /* User requested Tx queues */
|
|
|
|
u16 req_rxq; /* User requested Rx queues */
|
2019-02-09 04:50:59 +08:00
|
|
|
u16 num_rx_desc;
|
|
|
|
u16 num_tx_desc;
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_tc_cfg tc_cfg;
|
2019-11-05 01:38:56 +08:00
|
|
|
struct bpf_prog *xdp_prog;
|
|
|
|
struct ice_ring **xdp_rings; /* XDP ring array */
|
|
|
|
u16 num_xdp_txq; /* Used XDP queues */
|
|
|
|
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
|
2019-11-05 01:38:56 +08:00
|
|
|
struct xdp_umem **xsk_umems;
|
|
|
|
u16 num_xsk_umems_used;
|
|
|
|
u16 num_xsk_umems;
|
2018-03-20 22:58:11 +08:00
|
|
|
} ____cacheline_internodealigned_in_smp;
|
|
|
|
|
|
|
|
/* struct that defines an interrupt vector */
|
|
|
|
struct ice_q_vector {
|
|
|
|
struct ice_vsi *vsi;
|
2019-02-20 07:04:05 +08:00
|
|
|
|
2018-03-20 22:58:11 +08:00
|
|
|
u16 v_idx; /* index in the vsi->q_vector array. */
|
2019-03-01 07:25:59 +08:00
|
|
|
u16 reg_idx;
|
2018-10-27 02:44:47 +08:00
|
|
|
u8 num_ring_rx; /* total number of Rx rings in vector */
|
2019-02-20 07:04:05 +08:00
|
|
|
u8 num_ring_tx; /* total number of Tx rings in vector */
|
|
|
|
u8 itr_countdown; /* when 0 should adjust adaptive ITR */
|
2018-09-20 08:23:19 +08:00
|
|
|
/* in usecs, need to use ice_intrl_to_usecs_reg() before writing this
|
|
|
|
* value to the device
|
|
|
|
*/
|
|
|
|
u8 intrl;
|
2019-02-20 07:04:05 +08:00
|
|
|
|
|
|
|
struct napi_struct napi;
|
|
|
|
|
|
|
|
struct ice_ring_container rx;
|
|
|
|
struct ice_ring_container tx;
|
|
|
|
|
|
|
|
cpumask_t affinity_mask;
|
|
|
|
struct irq_affinity_notify affinity_notify;
|
|
|
|
|
|
|
|
char name[ICE_INT_NAME_STR_LEN];
|
2018-03-20 22:58:10 +08:00
|
|
|
} ____cacheline_internodealigned_in_smp;
|
|
|
|
|
|
|
|
enum ice_pf_flags {
|
|
|
|
ICE_FLAG_FLTR_SYNC,
|
|
|
|
ICE_FLAG_RSS_ENA,
|
2018-09-20 08:42:55 +08:00
|
|
|
ICE_FLAG_SRIOV_ENA,
|
2018-09-20 08:42:54 +08:00
|
|
|
ICE_FLAG_SRIOV_CAPABLE,
|
2019-03-01 07:24:22 +08:00
|
|
|
ICE_FLAG_DCB_CAPABLE,
|
|
|
|
ICE_FLAG_DCB_ENA,
|
2019-09-09 21:47:46 +08:00
|
|
|
ICE_FLAG_ADV_FEATURES,
|
2018-12-20 02:03:26 +08:00
|
|
|
ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
|
2019-06-26 17:20:17 +08:00
|
|
|
ICE_FLAG_NO_MEDIA,
|
2019-07-29 17:04:50 +08:00
|
|
|
ICE_FLAG_FW_LLDP_AGENT,
|
2019-03-01 07:24:31 +08:00
|
|
|
ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */
|
2019-10-24 16:11:22 +08:00
|
|
|
ICE_FLAG_LEGACY_RX,
|
2020-02-14 05:31:16 +08:00
|
|
|
ICE_FLAG_MDD_AUTO_RESET_VF,
|
2018-03-20 22:58:10 +08:00
|
|
|
ICE_PF_FLAGS_NBITS /* must be last */
|
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:05 +08:00
|
|
|
struct ice_pf {
|
|
|
|
struct pci_dev *pdev;
|
ice: Split irq_tracker into sw_irq_tracker and hw_irq_tracker
For the PF driver, when mapping interrupts to queues, we need to request
IRQs from the kernel and we also have to allocate interrupts from
the device.
Similarly, when the VF driver (iavf.ko) initializes, it requests the kernel
IRQs that it needs but it can't directly allocate interrupts in the device.
Instead, it sends a mailbox message to the ice driver, which then allocates
interrupts in the device on the VF driver's behalf.
Currently both these cases end up having to reserve entries in
pf->irq_tracker but irq_tracker itself is sized based on how many vectors
the PF driver needs. Under the right circumstances, the VF driver can fail
to get entries in irq_tracker, which will result in the VF driver failing
probe.
To fix this, sw_irq_tracker and hw_irq_tracker are introduced. The
sw_irq_tracker tracks only the PF's IRQ request and doesn't play any
role in VF init. hw_irq_tracker represents the device's interrupt space.
When interrupts have to be allocated in the device for either PF or VF,
hw_irq_tracker will be looked up to see if the device has run out of
interrupts.
Signed-off-by: Preethi Banala <preethi.banala@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-09-20 08:23:16 +08:00
|
|
|
|
|
|
|
/* OS reserved IRQ details */
|
2018-03-20 22:58:10 +08:00
|
|
|
struct msix_entry *msix_entries;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
struct ice_res_tracker *irq_tracker;
|
|
|
|
/* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
|
|
|
|
* number of MSIX vectors needed for all SR-IOV VFs from the number of
|
|
|
|
* MSIX vectors allowed on this PF.
|
|
|
|
*/
|
|
|
|
u16 sriov_base_vector;
|
ice: Split irq_tracker into sw_irq_tracker and hw_irq_tracker
For the PF driver, when mapping interrupts to queues, we need to request
IRQs from the kernel and we also have to allocate interrupts from
the device.
Similarly, when the VF driver (iavf.ko) initializes, it requests the kernel
IRQs that it needs but it can't directly allocate interrupts in the device.
Instead, it sends a mailbox message to the ice driver, which then allocates
interrupts in the device on the VF driver's behalf.
Currently both these cases end up having to reserve entries in
pf->irq_tracker but irq_tracker itself is sized based on how many vectors
the PF driver needs. Under the right circumstances, the VF driver can fail
to get entries in irq_tracker, which will result in the VF driver failing
probe.
To fix this, sw_irq_tracker and hw_irq_tracker are introduced. The
sw_irq_tracker tracks only the PF's IRQ request and doesn't play any
role in VF init. hw_irq_tracker represents the device's interrupt space.
When interrupts have to be allocated in the device for either PF or VF,
hw_irq_tracker will be looked up to see if the device has run out of
interrupts.
Signed-off-by: Preethi Banala <preethi.banala@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-09-20 08:23:16 +08:00
|
|
|
|
2018-03-20 22:58:10 +08:00
|
|
|
struct ice_vsi **vsi; /* VSIs created by the driver */
|
|
|
|
struct ice_sw *first_sw; /* first switch created by firmware */
|
2018-09-20 08:42:55 +08:00
|
|
|
/* Virtchnl/SR-IOV config info */
|
|
|
|
struct ice_vf *vf;
|
|
|
|
int num_alloc_vfs; /* actual number of VFs allocated */
|
2018-09-20 08:42:54 +08:00
|
|
|
u16 num_vfs_supported; /* num VFs supported for this PF */
|
2020-02-28 02:14:53 +08:00
|
|
|
u16 num_qps_per_vf;
|
|
|
|
u16 num_msix_per_vf;
|
2020-02-14 05:31:16 +08:00
|
|
|
/* used to ratelimit the MDD event logging */
|
|
|
|
unsigned long last_printed_mdd_jiffies;
|
2018-03-20 22:58:05 +08:00
|
|
|
DECLARE_BITMAP(state, __ICE_STATE_NBITS);
|
2018-03-20 22:58:10 +08:00
|
|
|
DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
|
2019-08-02 16:25:21 +08:00
|
|
|
unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */
|
|
|
|
unsigned long *avail_rxqs; /* bitmap to track PF Rx queue usage */
|
2018-03-20 22:58:10 +08:00
|
|
|
unsigned long serv_tmr_period;
|
|
|
|
unsigned long serv_tmr_prev;
|
|
|
|
struct timer_list serv_tmr;
|
|
|
|
struct work_struct serv_task;
|
|
|
|
struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */
|
|
|
|
struct mutex sw_mutex; /* lock for protecting VSI alloc flow */
|
2019-11-06 18:05:29 +08:00
|
|
|
struct mutex tc_mutex; /* lock to protect TC changes */
|
2018-03-20 22:58:05 +08:00
|
|
|
u32 msg_enable;
|
2018-03-20 22:58:15 +08:00
|
|
|
u32 hw_csum_rx_error;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
u32 oicr_idx; /* Other interrupt cause MSIX vector index */
|
ice: Split irq_tracker into sw_irq_tracker and hw_irq_tracker
For the PF driver, when mapping interrupts to queues, we need to request
IRQs from the kernel and we also have to allocate interrupts from
the device.
Similarly, when the VF driver (iavf.ko) initializes, it requests the kernel
IRQs that it needs but it can't directly allocate interrupts in the device.
Instead, it sends a mailbox message to the ice driver, which then allocates
interrupts in the device on the VF driver's behalf.
Currently both these cases end up having to reserve entries in
pf->irq_tracker but irq_tracker itself is sized based on how many vectors
the PF driver needs. Under the right circumstances, the VF driver can fail
to get entries in irq_tracker, which will result in the VF driver failing
probe.
To fix this, sw_irq_tracker and hw_irq_tracker are introduced. The
sw_irq_tracker tracks only the PF's IRQ request and doesn't play any
role in VF init. hw_irq_tracker represents the device's interrupt space.
When interrupts have to be allocated in the device for either PF or VF,
hw_irq_tracker will be looked up to see if the device has run out of
interrupts.
Signed-off-by: Preethi Banala <preethi.banala@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-09-20 08:23:16 +08:00
|
|
|
u32 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */
|
2019-08-02 16:25:21 +08:00
|
|
|
u16 max_pf_txqs; /* Total Tx queues PF wide */
|
|
|
|
u16 max_pf_rxqs; /* Total Rx queues PF wide */
|
2018-03-20 22:58:10 +08:00
|
|
|
u32 num_lan_msix; /* Total MSIX vectors for base driver */
|
2019-02-20 07:04:13 +08:00
|
|
|
u16 num_lan_tx; /* num LAN Tx queues setup */
|
|
|
|
u16 num_lan_rx; /* num LAN Rx queues setup */
|
2018-03-20 22:58:10 +08:00
|
|
|
u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */
|
|
|
|
u16 num_alloc_vsi;
|
2018-03-20 22:58:18 +08:00
|
|
|
u16 corer_count; /* Core reset count */
|
|
|
|
u16 globr_count; /* Global reset count */
|
|
|
|
u16 empr_count; /* EMP reset count */
|
|
|
|
u16 pfr_count; /* PF reset count */
|
|
|
|
|
2018-03-20 22:58:16 +08:00
|
|
|
struct ice_hw_port_stats stats;
|
|
|
|
struct ice_hw_port_stats stats_prev;
|
2018-03-20 22:58:05 +08:00
|
|
|
struct ice_hw hw;
|
2019-04-17 01:24:35 +08:00
|
|
|
u8 stat_prev_loaded:1; /* has previous stats been loaded */
|
2019-03-01 07:24:24 +08:00
|
|
|
#ifdef CONFIG_DCB
|
|
|
|
u16 dcbx_cap;
|
|
|
|
#endif /* CONFIG_DCB */
|
2018-08-09 21:29:53 +08:00
|
|
|
u32 tx_timeout_count;
|
|
|
|
unsigned long tx_timeout_last_recovery;
|
|
|
|
u32 tx_timeout_recovery_level;
|
2018-03-20 22:58:10 +08:00
|
|
|
char int_name[ICE_INT_NAME_STR_LEN];
|
2019-04-17 01:30:43 +08:00
|
|
|
u32 sw_int_count;
|
2018-03-20 22:58:05 +08:00
|
|
|
};
|
2018-03-20 22:58:10 +08:00
|
|
|
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_netdev_priv {
|
|
|
|
struct ice_vsi *vsi;
|
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:10 +08:00
|
|
|
/**
|
|
|
|
* ice_irq_dynamic_ena - Enable default interrupt generation settings
|
2019-02-20 07:04:13 +08:00
|
|
|
* @hw: pointer to HW struct
|
|
|
|
* @vsi: pointer to VSI struct, can be NULL
|
2018-03-20 22:58:13 +08:00
|
|
|
* @q_vector: pointer to q_vector, can be NULL
|
2018-03-20 22:58:10 +08:00
|
|
|
*/
|
2019-02-27 08:35:11 +08:00
|
|
|
static inline void
|
|
|
|
ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
|
|
|
|
struct ice_q_vector *q_vector)
|
2018-03-20 22:58:10 +08:00
|
|
|
{
|
2019-03-01 07:25:59 +08:00
|
|
|
u32 vector = (vsi && q_vector) ? q_vector->reg_idx :
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
((struct ice_pf *)hw->back)->oicr_idx;
|
2018-03-20 22:58:10 +08:00
|
|
|
int itr = ICE_ITR_NONE;
|
|
|
|
u32 val;
|
|
|
|
|
|
|
|
/* clear the PBA here, as this function is meant to clean out all
|
|
|
|
* previous interrupts and enable the interrupt
|
|
|
|
*/
|
|
|
|
val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
|
|
|
|
(itr << GLINT_DYN_CTL_ITR_INDX_S);
|
2018-03-20 22:58:13 +08:00
|
|
|
if (vsi)
|
|
|
|
if (test_bit(__ICE_DOWN, vsi->state))
|
|
|
|
return;
|
2018-03-20 22:58:10 +08:00
|
|
|
wr32(hw, GLINT_DYN_CTL(vector), val);
|
|
|
|
}
|
2018-03-20 22:58:13 +08:00
|
|
|
|
2019-09-09 21:47:46 +08:00
|
|
|
/**
|
|
|
|
* ice_netdev_to_pf - Retrieve the PF struct associated with a netdev
|
|
|
|
* @netdev: pointer to the netdev struct
|
|
|
|
*/
|
|
|
|
static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
|
|
|
|
{
|
|
|
|
struct ice_netdev_priv *np = netdev_priv(netdev);
|
|
|
|
|
|
|
|
return np->vsi->back;
|
|
|
|
}
|
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
return !!vsi->xdp_prog;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void ice_set_ring_xdp(struct ice_ring *ring)
|
|
|
|
{
|
|
|
|
ring->flags |= ICE_TX_FLAGS_RING_XDP;
|
|
|
|
}
|
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
/**
|
|
|
|
* ice_xsk_umem - get XDP UMEM bound to a ring
|
|
|
|
* @ring - ring to use
|
|
|
|
*
|
|
|
|
* Returns a pointer to xdp_umem structure if there is an UMEM present,
|
|
|
|
* NULL otherwise.
|
|
|
|
*/
|
|
|
|
static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
|
|
|
|
{
|
|
|
|
struct xdp_umem **umems = ring->vsi->xsk_umems;
|
2019-12-12 19:13:06 +08:00
|
|
|
u16 qid = ring->q_index;
|
2019-11-05 01:38:56 +08:00
|
|
|
|
|
|
|
if (ice_ring_is_xdp(ring))
|
|
|
|
qid -= ring->vsi->num_xdp_txq;
|
|
|
|
|
2019-12-12 19:13:06 +08:00
|
|
|
if (qid >= ring->vsi->num_xsk_umems || !umems || !umems[qid] ||
|
|
|
|
!ice_is_xdp_ena_vsi(ring->vsi))
|
2019-11-05 01:38:56 +08:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return umems[qid];
|
|
|
|
}
|
|
|
|
|
2019-03-01 07:26:01 +08:00
|
|
|
/**
|
2019-08-08 22:39:33 +08:00
|
|
|
* ice_get_main_vsi - Get the PF VSI
|
|
|
|
* @pf: PF instance
|
|
|
|
*
|
|
|
|
* returns pf->vsi[0], which by definition is the PF VSI
|
2019-03-01 07:26:01 +08:00
|
|
|
*/
|
2019-08-08 22:39:33 +08:00
|
|
|
static inline struct ice_vsi *ice_get_main_vsi(struct ice_pf *pf)
|
2019-03-01 07:26:01 +08:00
|
|
|
{
|
2019-08-08 22:39:33 +08:00
|
|
|
if (pf->vsi)
|
|
|
|
return pf->vsi[0];
|
2019-03-01 07:26:01 +08:00
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2019-04-17 01:30:43 +08:00
|
|
|
int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
|
|
|
|
int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
|
2018-03-20 22:58:16 +08:00
|
|
|
void ice_set_ethtool_ops(struct net_device *netdev);
|
2019-09-09 21:47:46 +08:00
|
|
|
void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
|
2019-09-03 16:31:06 +08:00
|
|
|
u16 ice_get_avail_txq_count(struct ice_pf *pf);
|
|
|
|
u16 ice_get_avail_rxq_count(struct ice_pf *pf);
|
2019-11-08 22:23:29 +08:00
|
|
|
int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx);
|
2019-07-25 17:53:50 +08:00
|
|
|
void ice_update_vsi_stats(struct ice_vsi *vsi);
|
|
|
|
void ice_update_pf_stats(struct ice_pf *pf);
|
2018-03-20 22:58:16 +08:00
|
|
|
int ice_up(struct ice_vsi *vsi);
|
|
|
|
int ice_down(struct ice_vsi *vsi);
|
2019-04-17 01:30:43 +08:00
|
|
|
int ice_vsi_cfg(struct ice_vsi *vsi);
|
|
|
|
struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
|
2019-11-05 01:38:56 +08:00
|
|
|
int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
|
|
|
|
int ice_destroy_xdp_rings(struct ice_vsi *vsi);
|
|
|
|
int
|
|
|
|
ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
|
|
|
|
u32 flags);
|
2018-03-20 22:58:15 +08:00
|
|
|
int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
|
|
|
|
int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
|
|
|
|
void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
|
2019-11-08 22:23:29 +08:00
|
|
|
int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
|
2018-03-20 22:58:16 +08:00
|
|
|
void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
|
2019-04-17 01:30:43 +08:00
|
|
|
int ice_open(struct net_device *netdev);
|
|
|
|
int ice_stop(struct net_device *netdev);
|
2018-03-20 22:58:15 +08:00
|
|
|
|
2018-03-20 22:58:05 +08:00
|
|
|
#endif /* _ICE_H_ */
|