2018-03-20 22:58:05 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
/* Copyright (c) 2018, Intel Corporation. */
|
|
|
|
|
|
|
|
#ifndef _ICE_H_
|
|
|
|
#define _ICE_H_
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/module.h>
|
2019-09-09 21:47:46 +08:00
|
|
|
#include <linux/firmware.h>
|
2018-03-20 22:58:05 +08:00
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/compiler.h>
|
2018-03-20 22:58:09 +08:00
|
|
|
#include <linux/etherdevice.h>
|
2018-03-20 22:58:13 +08:00
|
|
|
#include <linux/skbuff.h>
|
2018-03-20 22:58:11 +08:00
|
|
|
#include <linux/cpumask.h>
|
2018-03-20 22:58:16 +08:00
|
|
|
#include <linux/rtnetlink.h>
|
2018-03-20 22:58:11 +08:00
|
|
|
#include <linux/if_vlan.h>
|
2018-03-20 22:58:13 +08:00
|
|
|
#include <linux/dma-mapping.h>
|
2018-03-20 22:58:05 +08:00
|
|
|
#include <linux/pci.h>
|
2018-03-20 22:58:10 +08:00
|
|
|
#include <linux/workqueue.h>
|
ice: implement device flash update via devlink
Use the newly added pldmfw library to implement device flash update for
the Intel ice networking device driver. This support uses the devlink
flash update interface.
The main parts of the flash include the Option ROM, the netlist module,
and the main NVM data. The PLDM firmware file contains modules for each
of these components.
Using the pldmfw library, the provided firmware file will be scanned for
the three major components, "fw.undi" for the Option ROM, "fw.mgmt" for
the main NVM module containing the primary device firmware, and
"fw.netlist" containing the netlist module.
The flash is separated into two banks, the active bank containing the
running firmware, and the inactive bank which we use for update. Each
module is updated in a staged process. First, the inactive bank is
erased, preparing the device for update. Second, the contents of the
component are copied to the inactive portion of the flash. After all
components are updated, the driver signals the device to switch the
active bank during the next EMP reset (which would usually occur during
the next reboot).
Although the firmware AdminQ interface does report an immediate status
for each command, the NVM erase and NVM write commands receive status
asynchronously. The driver must not continue writing until previous
erase and write commands have finished. The real status of the NVM
commands is returned over the receive AdminQ. Implement a simple
interface that uses a wait queue so that the main update thread can
sleep until the completion status is reported by firmware. For erasing
the inactive banks, this can take quite a while in practice.
To help visualize the process to the devlink application and other
applications based on the devlink netlink interface, status is reported
via the devlink_flash_update_status_notify. While we do report status
after each 4k block when writing, there is no real status we can report
during erasing. We simply must wait for the complete module erasure to
finish.
With this implementation, basic flash update for the ice hardware is
supported.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-24 08:22:03 +08:00
|
|
|
#include <linux/wait.h>
|
2018-03-20 22:58:05 +08:00
|
|
|
#include <linux/aer.h>
|
2018-03-20 22:58:10 +08:00
|
|
|
#include <linux/interrupt.h>
|
2018-03-20 22:58:16 +08:00
|
|
|
#include <linux/ethtool.h>
|
2018-03-20 22:58:10 +08:00
|
|
|
#include <linux/timer.h>
|
2018-03-20 22:58:06 +08:00
|
|
|
#include <linux/delay.h>
|
2018-03-20 22:58:05 +08:00
|
|
|
#include <linux/bitmap.h>
|
2018-03-20 22:58:11 +08:00
|
|
|
#include <linux/log2.h>
|
2018-03-20 22:58:15 +08:00
|
|
|
#include <linux/ip.h>
|
2018-12-20 02:03:32 +08:00
|
|
|
#include <linux/sctp.h>
|
2018-03-20 22:58:15 +08:00
|
|
|
#include <linux/ipv6.h>
|
2019-11-05 01:38:56 +08:00
|
|
|
#include <linux/pkt_sched.h>
|
2018-03-20 22:58:10 +08:00
|
|
|
#include <linux/if_bridge.h>
|
2019-09-09 21:47:42 +08:00
|
|
|
#include <linux/ctype.h>
|
2019-11-05 01:38:56 +08:00
|
|
|
#include <linux/bpf.h>
|
2021-10-13 02:31:03 +08:00
|
|
|
#include <linux/btf.h>
|
2021-05-20 22:37:51 +08:00
|
|
|
#include <linux/auxiliary_bus.h>
|
2018-09-20 08:42:55 +08:00
|
|
|
#include <linux/avf/virtchnl.h>
|
2020-05-12 09:01:46 +08:00
|
|
|
#include <linux/cpu_rmap.h>
|
ice: replace custom AIM algorithm with kernel's DIM library
The ice driver has support for adaptive interrupt moderation, an
algorithm for tuning the interrupt rate dynamically. This algorithm
is based on various assumptions about ring size, socket buffer size,
link speed, SKB overhead, ethernet frame overhead and more.
The Linux kernel has support for a dynamic interrupt moderation
algorithm known as "dimlib". Replace the custom driver-specific
implementation of dynamic interrupt moderation with the kernel's
algorithm.
The Intel hardware has a different hardware implementation than the
originators of the dimlib code had to work with, which requires the
driver to use a slightly different set of inputs for the actual
moderation values, while getting all the advice from dimlib of
better/worse, shift left or right.
The change made for this implementation is to use a pair of values
for each of the 5 "slots" that the dimlib moderation expects, and
the driver will program those pairs when dimlib recommends a slot to
use. The currently implementation uses two tables, one for receive
and one for transmit, and the pairs of values in each slot set the
maximum delay of an interrupt and a maximum number of interrupts per
second (both expressed in microseconds).
There are two separate kinds of bugs fixed by using DIMLIB, one is
UDP single stream send was too slow, and the other is that 8K
ping-pong was going to the most aggressive moderation and has much
too high latency.
The overall result of using DIMLIB is that we meet or exceed our
performance expectations set based on the old algorithm.
Co-developed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-04-01 05:16:57 +08:00
|
|
|
#include <linux/dim.h>
|
2021-10-16 07:35:15 +08:00
|
|
|
#include <net/pkt_cls.h>
|
2021-10-16 07:35:17 +08:00
|
|
|
#include <net/tc_act/tc_mirred.h>
|
|
|
|
#include <net/tc_act/tc_gact.h>
|
|
|
|
#include <net/ip.h>
|
2020-03-12 09:58:15 +08:00
|
|
|
#include <net/devlink.h>
|
2018-03-20 22:58:15 +08:00
|
|
|
#include <net/ipv6.h>
|
2019-11-05 01:38:56 +08:00
|
|
|
#include <net/xdp_sock.h>
|
2020-11-02 17:37:27 +08:00
|
|
|
#include <net/xdp_sock_drv.h>
|
2020-05-07 00:32:30 +08:00
|
|
|
#include <net/geneve.h>
|
|
|
|
#include <net/gre.h>
|
|
|
|
#include <net/udp_tunnel.h>
|
|
|
|
#include <net/vxlan.h>
|
2021-03-03 02:12:06 +08:00
|
|
|
#if IS_ENABLED(CONFIG_DCB)
|
|
|
|
#include <scsi/iscsi_proto.h>
|
|
|
|
#endif /* CONFIG_DCB */
|
2018-03-20 22:58:05 +08:00
|
|
|
#include "ice_devids.h"
|
|
|
|
#include "ice_type.h"
|
2018-03-20 22:58:10 +08:00
|
|
|
#include "ice_txrx.h"
|
2019-03-01 07:24:22 +08:00
|
|
|
#include "ice_dcb.h"
|
ice: Get switch config, scheduler config and device capabilities
This patch adds to the initialization flow by getting switch
configuration, scheduler configuration and device capabilities.
Switch configuration:
On boot, an L2 switch element is created in the firmware per physical
function. Each physical function is also mapped to a port, to which its
switch element is connected. In other words, this switch can be visualized
as an embedded vSwitch that can connect a physical function's virtual
station interfaces (VSIs) to the egress/ingress port. Egress/ingress
filters will be eventually created and applied on this switch element.
As part of the initialization flow, the driver gets configuration data
from this switch element and stores it.
Scheduler configuration:
The Tx scheduler is a subsystem responsible for setting and enforcing QoS.
As part of the initialization flow, the driver queries and stores the
default scheduler configuration for the given physical function.
Device capabilities:
As part of initialization, the driver has to determine what the device is
capable of (ex. max queues, VSIs, etc). This information is obtained from
the firmware and stored by the driver.
CC: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-03-20 22:58:08 +08:00
|
|
|
#include "ice_switch.h"
|
2018-03-20 22:58:07 +08:00
|
|
|
#include "ice_common.h"
|
2021-10-16 07:35:16 +08:00
|
|
|
#include "ice_flow.h"
|
ice: Get switch config, scheduler config and device capabilities
This patch adds to the initialization flow by getting switch
configuration, scheduler configuration and device capabilities.
Switch configuration:
On boot, an L2 switch element is created in the firmware per physical
function. Each physical function is also mapped to a port, to which its
switch element is connected. In other words, this switch can be visualized
as an embedded vSwitch that can connect a physical function's virtual
station interfaces (VSIs) to the egress/ingress port. Egress/ingress
filters will be eventually created and applied on this switch element.
As part of the initialization flow, the driver gets configuration data
from this switch element and stores it.
Scheduler configuration:
The Tx scheduler is a subsystem responsible for setting and enforcing QoS.
As part of the initialization flow, the driver queries and stores the
default scheduler configuration for the given physical function.
Device capabilities:
As part of initialization, the driver has to determine what the device is
capable of (ex. max queues, VSIs, etc). This information is obtained from
the firmware and stored by the driver.
CC: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-03-20 22:58:08 +08:00
|
|
|
#include "ice_sched.h"
|
2021-05-20 22:37:50 +08:00
|
|
|
#include "ice_idc_int.h"
|
2018-09-20 08:42:55 +08:00
|
|
|
#include "ice_virtchnl_pf.h"
|
2018-09-20 08:42:57 +08:00
|
|
|
#include "ice_sriov.h"
|
2021-06-10 00:39:50 +08:00
|
|
|
#include "ice_ptp.h"
|
2020-05-12 09:01:40 +08:00
|
|
|
#include "ice_fdir.h"
|
2019-11-05 01:38:56 +08:00
|
|
|
#include "ice_xsk.h"
|
2020-05-12 09:01:46 +08:00
|
|
|
#include "ice_arfs.h"
|
2021-08-20 08:08:50 +08:00
|
|
|
#include "ice_repr.h"
|
2021-08-06 16:49:05 +08:00
|
|
|
#include "ice_eswitch.h"
|
2020-11-21 08:39:26 +08:00
|
|
|
#include "ice_lag.h"
|
2018-03-20 22:58:05 +08:00
|
|
|
|
|
|
|
#define ICE_BAR0 0
|
2018-03-20 22:58:11 +08:00
|
|
|
#define ICE_REQ_DESC_MULTIPLE 32
|
2019-04-17 01:34:56 +08:00
|
|
|
#define ICE_MIN_NUM_DESC 64
|
2018-09-20 08:23:11 +08:00
|
|
|
#define ICE_MAX_NUM_DESC 8160
|
2019-04-17 01:30:41 +08:00
|
|
|
#define ICE_DFLT_MIN_RX_DESC 512
|
2019-09-03 16:31:07 +08:00
|
|
|
#define ICE_DFLT_NUM_TX_DESC 256
|
|
|
|
#define ICE_DFLT_NUM_RX_DESC 2048
|
2019-02-09 04:50:59 +08:00
|
|
|
|
2018-03-20 22:58:17 +08:00
|
|
|
#define ICE_DFLT_TRAFFIC_CLASS BIT(0)
|
2018-03-20 22:58:10 +08:00
|
|
|
#define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16)
|
2021-06-10 00:39:46 +08:00
|
|
|
#define ICE_AQ_LEN 192
|
2019-07-25 16:55:38 +08:00
|
|
|
#define ICE_MBXSQ_LEN 64
|
2021-06-10 00:39:46 +08:00
|
|
|
#define ICE_SBQ_LEN 64
|
2021-01-22 02:38:06 +08:00
|
|
|
#define ICE_MIN_LAN_TXRX_MSIX 1
|
|
|
|
#define ICE_MIN_LAN_OICR_MSIX 1
|
|
|
|
#define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX)
|
2021-03-09 11:08:03 +08:00
|
|
|
#define ICE_FDIR_MSIX 2
|
2021-05-20 22:37:49 +08:00
|
|
|
#define ICE_RDMA_NUM_AEQ_MSIX 4
|
|
|
|
#define ICE_MIN_RDMA_MSIX 2
|
2021-08-20 08:08:55 +08:00
|
|
|
#define ICE_ESWITCH_MSIX 1
|
2018-03-20 22:58:11 +08:00
|
|
|
#define ICE_NO_VSI 0xffff
|
|
|
|
#define ICE_VSI_MAP_CONTIG 0
|
|
|
|
#define ICE_VSI_MAP_SCATTER 1
|
|
|
|
#define ICE_MAX_SCATTER_TXQS 16
|
|
|
|
#define ICE_MAX_SCATTER_RXQS 16
|
2018-03-20 22:58:13 +08:00
|
|
|
#define ICE_Q_WAIT_RETRY_LIMIT 10
|
|
|
|
#define ICE_Q_WAIT_MAX_RETRY (5 * ICE_Q_WAIT_RETRY_LIMIT)
|
2018-03-20 22:58:15 +08:00
|
|
|
#define ICE_MAX_LG_RSS_QS 256
|
2018-03-20 22:58:10 +08:00
|
|
|
#define ICE_RES_VALID_BIT 0x8000
|
|
|
|
#define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1)
|
2021-05-20 22:37:49 +08:00
|
|
|
#define ICE_RES_RDMA_VEC_ID (ICE_RES_MISC_VEC_ID - 1)
|
2021-03-09 11:08:03 +08:00
|
|
|
/* All VF control VSIs share the same IRQ, so assign a unique ID for them */
|
2021-05-20 22:37:49 +08:00
|
|
|
#define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_RDMA_VEC_ID - 1)
|
2018-03-20 22:58:11 +08:00
|
|
|
#define ICE_INVAL_Q_INDEX 0xffff
|
2018-08-09 21:29:50 +08:00
|
|
|
#define ICE_INVAL_VFID 256
|
2018-03-20 22:58:05 +08:00
|
|
|
|
2021-03-03 02:15:33 +08:00
|
|
|
#define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */
|
2021-10-16 07:35:15 +08:00
|
|
|
|
|
|
|
#define ICE_CHNL_START_TC 1
|
|
|
|
|
2018-10-27 01:40:51 +08:00
|
|
|
#define ICE_MAX_RESET_WAIT 20
|
|
|
|
|
2018-03-20 22:58:16 +08:00
|
|
|
#define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4)
|
|
|
|
|
2018-03-20 22:58:05 +08:00
|
|
|
#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
|
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
|
2018-03-20 22:58:11 +08:00
|
|
|
|
|
|
|
#define ICE_UP_TABLE_TRANSLATE(val, i) \
|
|
|
|
(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
|
|
|
|
ICE_AQ_VSI_UP_TABLE_UP##i##_M)
|
|
|
|
|
2018-03-20 22:58:14 +08:00
|
|
|
#define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i]))
|
2018-03-20 22:58:13 +08:00
|
|
|
#define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))
|
2018-03-20 22:58:15 +08:00
|
|
|
#define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i]))
|
2020-05-12 09:01:42 +08:00
|
|
|
#define ICE_TX_FDIRDESC(R, i) (&(((struct ice_fltr_desc *)((R)->desc))[i]))
|
2018-03-20 22:58:13 +08:00
|
|
|
|
2021-10-16 07:35:16 +08:00
|
|
|
/* Minimum BW limit is 500 Kbps for any scheduler node */
|
|
|
|
#define ICE_MIN_BW_LIMIT 500
|
|
|
|
/* User can specify BW in either Kbit/Mbit/Gbit and OS converts it in bytes.
|
|
|
|
* use it to convert user specified BW limit into Kbps
|
|
|
|
*/
|
|
|
|
#define ICE_BW_KBPS_DIVISOR 125
|
|
|
|
|
2018-03-20 22:58:18 +08:00
|
|
|
/* Macro for each VSI in a PF */
|
|
|
|
#define ice_for_each_vsi(pf, i) \
|
|
|
|
for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++)
|
|
|
|
|
2021-08-19 20:00:04 +08:00
|
|
|
/* Macros for each Tx/Xdp/Rx ring in a VSI */
|
2018-03-20 22:58:13 +08:00
|
|
|
#define ice_for_each_txq(vsi, i) \
|
|
|
|
for ((i) = 0; (i) < (vsi)->num_txq; (i)++)
|
|
|
|
|
2021-08-19 20:00:04 +08:00
|
|
|
#define ice_for_each_xdp_txq(vsi, i) \
|
|
|
|
for ((i) = 0; (i) < (vsi)->num_xdp_txq; (i)++)
|
|
|
|
|
2018-03-20 22:58:13 +08:00
|
|
|
#define ice_for_each_rxq(vsi, i) \
|
|
|
|
for ((i) = 0; (i) < (vsi)->num_rxq; (i)++)
|
|
|
|
|
2018-10-27 02:44:47 +08:00
|
|
|
/* Macros for each allocated Tx/Rx ring whether used or not in a VSI */
|
ice: Report stats for allocated queues via ethtool stats
It is not safe to have the string table for statistics change order or
size over the lifetime of a given netdevice. This is because of the
nature of the 3-step process for obtaining stats. First, user space
performs a request for the size of the strings table. Second it performs
a separate request for the strings themselves, after allocating space
for the table. Third, it requests the stats themselves, also allocating
space for the table.
If the size decreased, there is potential to see garbage data or stats
values. In the worst case, we could potentially see stats values become
mis-aligned with their strings, so that it looks like a statistic is
being reported differently than it actually is.
Even worse, if the size increased, there is potential that the strings
table or stats table was not allocated large enough and the stats code
could access and write to memory it should not, potentially resulting in
undefined behavior and system crashes.
It isn't even safe if the size always changes under the RTNL lock. This
is because the calls take place over multiple user space commands, so it
is not possible to hold the RTNL lock for the entire duration of
obtaining strings and stats. Further, not all consumers of the ethtool
API are the user space ethtool program, and it is possible that one
assumes the strings will not change (valid under the current contract),
and thus only requests the stats values when requesting stats in a loop.
Finally, it's not possible in the general case to detect when the size
changes, because it is quite possible that one value which could impact
the stat size increased, while another decreased. This would result in
the same total number of stats, but reordering them so that stats no
longer line up with the strings they belong to. Since only size changes
aren't enough, we would need some sort of hash or token to determine
when the strings no longer match. This would require extending the
ethtool stats commands, but there is no more space in the relevant
structures.
The real solution to resolve this would be to add a completely new API
for stats, probably over netlink.
In the ice driver, the only thing impacting the stats that is not
constant is the number of queues. Instead of reporting stats for each
used queue, report stats for each allocated queue. We do not change the
number of queues allocated for a given netdevice, as we pass this into
the alloc_etherdev_mq() function to set the num_tx_queues and
num_rx_queues.
This resolves the potential bugs at the slight cost of displaying many
queue statistics which will not be activated.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-08-09 21:28:54 +08:00
|
|
|
#define ice_for_each_alloc_txq(vsi, i) \
|
|
|
|
for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++)
|
|
|
|
|
|
|
|
#define ice_for_each_alloc_rxq(vsi, i) \
|
|
|
|
for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++)
|
|
|
|
|
2018-12-20 02:03:30 +08:00
|
|
|
#define ice_for_each_q_vector(vsi, i) \
|
|
|
|
for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++)
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
#define ice_for_each_chnl_tc(i) \
|
|
|
|
for ((i) = ICE_CHNL_START_TC; (i) < ICE_CHNL_MAX_TC; (i)++)
|
|
|
|
|
2021-02-27 05:19:23 +08:00
|
|
|
#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_UCAST_RX)
|
2019-02-27 08:35:14 +08:00
|
|
|
|
|
|
|
#define ICE_UCAST_VLAN_PROMISC_BITS (ICE_PROMISC_UCAST_TX | \
|
|
|
|
ICE_PROMISC_UCAST_RX | \
|
|
|
|
ICE_PROMISC_VLAN_TX | \
|
|
|
|
ICE_PROMISC_VLAN_RX)
|
|
|
|
|
|
|
|
#define ICE_MCAST_PROMISC_BITS (ICE_PROMISC_MCAST_TX | ICE_PROMISC_MCAST_RX)
|
|
|
|
|
|
|
|
#define ICE_MCAST_VLAN_PROMISC_BITS (ICE_PROMISC_MCAST_TX | \
|
|
|
|
ICE_PROMISC_MCAST_RX | \
|
|
|
|
ICE_PROMISC_VLAN_TX | \
|
|
|
|
ICE_PROMISC_VLAN_RX)
|
|
|
|
|
2019-11-08 22:23:26 +08:00
|
|
|
#define ice_pf_to_dev(pf) (&((pf)->pdev->dev))
|
|
|
|
|
2021-07-17 06:16:41 +08:00
|
|
|
enum ice_feature {
|
|
|
|
ICE_F_DSCP,
|
2021-08-17 19:09:18 +08:00
|
|
|
ICE_F_SMA_CTRL,
|
2021-07-17 06:16:41 +08:00
|
|
|
ICE_F_MAX
|
|
|
|
};
|
|
|
|
|
2021-08-19 20:00:03 +08:00
|
|
|
DECLARE_STATIC_KEY_FALSE(ice_xdp_locking_key);
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
struct ice_channel {
|
|
|
|
struct list_head list;
|
|
|
|
u8 type;
|
|
|
|
u16 sw_id;
|
|
|
|
u16 base_q;
|
|
|
|
u16 num_rxq;
|
|
|
|
u16 num_txq;
|
|
|
|
u16 vsi_num;
|
|
|
|
u8 ena_tc;
|
|
|
|
struct ice_aqc_vsi_props info;
|
|
|
|
u64 max_tx_rate;
|
|
|
|
u64 min_tx_rate;
|
2021-12-30 02:54:33 +08:00
|
|
|
atomic_t num_sb_fltr;
|
2021-10-16 07:35:15 +08:00
|
|
|
struct ice_vsi *ch_vsi;
|
|
|
|
};
|
|
|
|
|
2019-10-24 16:11:17 +08:00
|
|
|
struct ice_txq_meta {
|
|
|
|
u32 q_teid; /* Tx-scheduler element identifier */
|
|
|
|
u16 q_id; /* Entry in VSI's txq_map bitmap */
|
|
|
|
u16 q_handle; /* Relative index of Tx queue within TC */
|
|
|
|
u16 vsi_idx; /* VSI index that Tx queue belongs to */
|
|
|
|
u8 tc; /* TC number that Tx queue belongs to */
|
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_tc_info {
|
|
|
|
u16 qoffset;
|
2018-10-27 02:44:35 +08:00
|
|
|
u16 qcount_tx;
|
|
|
|
u16 qcount_rx;
|
|
|
|
u8 netdev_tc;
|
2018-03-20 22:58:11 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct ice_tc_cfg {
|
|
|
|
u8 numtc; /* Total number of enabled TCs */
|
2021-10-16 07:35:15 +08:00
|
|
|
u16 ena_tc; /* Tx map */
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
|
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:10 +08:00
|
|
|
struct ice_res_tracker {
|
|
|
|
u16 num_entries;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
u16 end;
|
2020-09-30 03:01:56 +08:00
|
|
|
u16 list[];
|
2018-03-20 22:58:10 +08:00
|
|
|
};
|
|
|
|
|
2018-12-20 02:03:27 +08:00
|
|
|
struct ice_qs_cfg {
|
2019-02-20 07:04:12 +08:00
|
|
|
struct mutex *qs_mutex; /* will be assigned to &pf->avail_q_mutex */
|
2018-12-20 02:03:27 +08:00
|
|
|
unsigned long *pf_map;
|
|
|
|
unsigned long pf_map_size;
|
|
|
|
unsigned int q_count;
|
|
|
|
unsigned int scatter_count;
|
|
|
|
u16 *vsi_map;
|
|
|
|
u16 vsi_map_offset;
|
|
|
|
u8 mapping_mode;
|
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:10 +08:00
|
|
|
struct ice_sw {
|
|
|
|
struct ice_pf *pf;
|
|
|
|
u16 sw_id; /* switch ID for this switch */
|
|
|
|
u16 bridge_mode; /* VEB/VEPA/Port Virtualizer */
|
2019-12-12 19:12:55 +08:00
|
|
|
struct ice_vsi *dflt_vsi; /* default VSI for this switch */
|
|
|
|
u8 dflt_vsi_ena:1; /* true if above dflt_vsi is enabled */
|
2018-03-20 22:58:10 +08:00
|
|
|
};
|
|
|
|
|
2021-03-03 02:15:37 +08:00
|
|
|
enum ice_pf_state {
|
2021-03-03 02:15:38 +08:00
|
|
|
ICE_TESTING,
|
|
|
|
ICE_DOWN,
|
|
|
|
ICE_NEEDS_RESTART,
|
|
|
|
ICE_PREPARED_FOR_RESET, /* set by driver when prepared */
|
|
|
|
ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */
|
2021-05-20 22:37:50 +08:00
|
|
|
ICE_PFR_REQ, /* set by driver */
|
|
|
|
ICE_CORER_REQ, /* set by driver */
|
|
|
|
ICE_GLOBR_REQ, /* set by driver */
|
2021-03-03 02:15:38 +08:00
|
|
|
ICE_CORER_RECV, /* set by OICR handler */
|
|
|
|
ICE_GLOBR_RECV, /* set by OICR handler */
|
|
|
|
ICE_EMPR_RECV, /* set by OICR handler */
|
|
|
|
ICE_SUSPENDED, /* set on module remove path */
|
|
|
|
ICE_RESET_FAILED, /* set by reset/rebuild */
|
2018-09-20 08:42:55 +08:00
|
|
|
/* When checking for the PF to be in a nominal operating state, the
|
|
|
|
* bits that are grouped at the beginning of the list need to be
|
2021-03-03 02:15:38 +08:00
|
|
|
* checked. Bits occurring before ICE_STATE_NOMINAL_CHECK_BITS will
|
2018-10-27 02:44:46 +08:00
|
|
|
* be checked. If you need to add a bit into consideration for nominal
|
2018-09-20 08:42:55 +08:00
|
|
|
* operating state, it must be added before
|
2021-03-03 02:15:38 +08:00
|
|
|
* ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position
|
2018-09-20 08:42:55 +08:00
|
|
|
* without appropriate consideration.
|
|
|
|
*/
|
2021-03-03 02:15:38 +08:00
|
|
|
ICE_STATE_NOMINAL_CHECK_BITS,
|
|
|
|
ICE_ADMINQ_EVENT_PENDING,
|
|
|
|
ICE_MAILBOXQ_EVENT_PENDING,
|
2021-06-10 00:39:46 +08:00
|
|
|
ICE_SIDEBANDQ_EVENT_PENDING,
|
2021-03-03 02:15:38 +08:00
|
|
|
ICE_MDD_EVENT_PENDING,
|
|
|
|
ICE_VFLR_EVENT_PENDING,
|
|
|
|
ICE_FLTR_OVERFLOW_PROMISC,
|
|
|
|
ICE_VF_DIS,
|
2021-08-05 03:12:42 +08:00
|
|
|
ICE_VF_DEINIT_IN_PROGRESS,
|
2021-03-03 02:15:38 +08:00
|
|
|
ICE_CFG_BUSY,
|
|
|
|
ICE_SERVICE_SCHED,
|
|
|
|
ICE_SERVICE_DIS,
|
|
|
|
ICE_FD_FLUSH_REQ,
|
|
|
|
ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */
|
|
|
|
ICE_MDD_VF_PRINT_PENDING, /* set when MDD event handle */
|
|
|
|
ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */
|
|
|
|
ICE_LINK_DEFAULT_OVERRIDE_PENDING,
|
|
|
|
ICE_PHY_INIT_COMPLETE,
|
|
|
|
ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */
|
|
|
|
ICE_STATE_NBITS /* must be last */
|
2018-03-20 22:58:05 +08:00
|
|
|
};
|
|
|
|
|
2021-03-03 02:15:37 +08:00
|
|
|
enum ice_vsi_state {
|
|
|
|
ICE_VSI_DOWN,
|
|
|
|
ICE_VSI_NEEDS_RESTART,
|
2021-03-03 02:15:41 +08:00
|
|
|
ICE_VSI_NETDEV_ALLOCD,
|
|
|
|
ICE_VSI_NETDEV_REGISTERED,
|
2021-03-03 02:15:37 +08:00
|
|
|
ICE_VSI_UMAC_FLTR_CHANGED,
|
|
|
|
ICE_VSI_MMAC_FLTR_CHANGED,
|
|
|
|
ICE_VSI_VLAN_FLTR_CHANGED,
|
|
|
|
ICE_VSI_PROMISC_CHANGED,
|
|
|
|
ICE_VSI_STATE_NBITS /* must be last */
|
2018-03-20 22:58:19 +08:00
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:10 +08:00
|
|
|
/* struct that defines a VSI, associated with a dev */
|
|
|
|
struct ice_vsi {
|
|
|
|
struct net_device *netdev;
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_sw *vsw; /* switch this VSI is on */
|
|
|
|
struct ice_pf *back; /* back pointer to PF */
|
2018-03-20 22:58:10 +08:00
|
|
|
struct ice_port_info *port_info; /* back pointer to port_info */
|
2021-08-19 19:59:58 +08:00
|
|
|
struct ice_rx_ring **rx_rings; /* Rx ring array */
|
|
|
|
struct ice_tx_ring **tx_rings; /* Tx ring array */
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_q_vector **q_vectors; /* q_vector array */
|
2018-03-20 22:58:13 +08:00
|
|
|
|
|
|
|
irqreturn_t (*irq_handler)(int irq, void *data);
|
|
|
|
|
2018-03-20 22:58:16 +08:00
|
|
|
u64 tx_linearize;
|
2021-03-03 02:15:37 +08:00
|
|
|
DECLARE_BITMAP(state, ICE_VSI_STATE_NBITS);
|
2018-03-20 22:58:19 +08:00
|
|
|
unsigned int current_netdev_flags;
|
2018-03-20 22:58:16 +08:00
|
|
|
u32 tx_restart;
|
|
|
|
u32 tx_busy;
|
|
|
|
u32 rx_buf_failed;
|
|
|
|
u32 rx_page_failed;
|
2020-05-08 08:41:05 +08:00
|
|
|
u16 num_q_vectors;
|
|
|
|
u16 base_vector; /* IRQ base for OS reserved vectors */
|
2018-03-20 22:58:11 +08:00
|
|
|
enum ice_vsi_type type;
|
2018-10-27 02:44:46 +08:00
|
|
|
u16 vsi_num; /* HW (absolute) index of this VSI */
|
|
|
|
u16 idx; /* software index in pf->vsi[] */
|
2018-03-20 22:58:11 +08:00
|
|
|
|
2018-09-20 08:42:56 +08:00
|
|
|
s16 vf_id; /* VF ID for SR-IOV VSIs */
|
|
|
|
|
2019-04-17 01:21:24 +08:00
|
|
|
u16 ethtype; /* Ethernet protocol for pause frame */
|
2020-05-12 09:01:40 +08:00
|
|
|
u16 num_gfltr;
|
|
|
|
u16 num_bfltr;
|
2019-04-17 01:21:24 +08:00
|
|
|
|
2018-03-20 22:58:15 +08:00
|
|
|
/* RSS config */
|
|
|
|
u16 rss_table_size; /* HW RSS table size */
|
|
|
|
u16 rss_size; /* Allocated RSS queues */
|
|
|
|
u8 *rss_hkey_user; /* User configured hash keys */
|
|
|
|
u8 *rss_lut_user; /* User configured lookup table entries */
|
|
|
|
u8 rss_lut_type; /* used to configure Get/Set RSS LUT AQ call */
|
|
|
|
|
2020-05-12 09:01:46 +08:00
|
|
|
/* aRFS members only allocated for the PF VSI */
|
|
|
|
#define ICE_MAX_ARFS_LIST 1024
|
|
|
|
#define ICE_ARFS_LST_MASK (ICE_MAX_ARFS_LIST - 1)
|
|
|
|
struct hlist_head *arfs_fltr_list;
|
|
|
|
struct ice_arfs_active_fltr_cntrs *arfs_fltr_cntrs;
|
|
|
|
spinlock_t arfs_lock; /* protects aRFS hash table and filter state */
|
|
|
|
atomic_t *arfs_last_fltr_id;
|
|
|
|
|
2018-03-20 22:58:13 +08:00
|
|
|
u16 max_frame;
|
|
|
|
u16 rx_buf_len;
|
|
|
|
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_aqc_vsi_props info; /* VSI properties */
|
|
|
|
|
2018-03-20 22:58:16 +08:00
|
|
|
/* VSI stats */
|
|
|
|
struct rtnl_link_stats64 net_stats;
|
|
|
|
struct ice_eth_stats eth_stats;
|
|
|
|
struct ice_eth_stats eth_stats_prev;
|
|
|
|
|
2018-03-20 22:58:19 +08:00
|
|
|
struct list_head tmp_sync_list; /* MAC filters to be synced */
|
|
|
|
struct list_head tmp_unsync_list; /* MAC filters to be unsynced */
|
|
|
|
|
2019-04-17 01:24:35 +08:00
|
|
|
u8 irqs_ready:1;
|
|
|
|
u8 current_isup:1; /* Sync 'link up' logging */
|
|
|
|
u8 stat_offsets_loaded:1;
|
2019-12-12 19:12:54 +08:00
|
|
|
u16 num_vlan;
|
2018-03-20 22:58:13 +08:00
|
|
|
|
2018-03-20 22:58:11 +08:00
|
|
|
/* queue information */
|
|
|
|
u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
|
|
|
|
u8 rx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
|
2019-08-02 16:25:21 +08:00
|
|
|
u16 *txq_map; /* index in pf->avail_txqs */
|
|
|
|
u16 *rxq_map; /* index in pf->avail_rxqs */
|
2018-03-20 22:58:11 +08:00
|
|
|
u16 alloc_txq; /* Allocated Tx queues */
|
|
|
|
u16 num_txq; /* Used Tx queues */
|
|
|
|
u16 alloc_rxq; /* Allocated Rx queues */
|
|
|
|
u16 num_rxq; /* Used Rx queues */
|
2019-11-08 22:23:29 +08:00
|
|
|
u16 req_txq; /* User requested Tx queues */
|
|
|
|
u16 req_rxq; /* User requested Rx queues */
|
2019-02-09 04:50:59 +08:00
|
|
|
u16 num_rx_desc;
|
|
|
|
u16 num_tx_desc;
|
2021-05-20 22:37:50 +08:00
|
|
|
u16 qset_handle[ICE_MAX_TRAFFIC_CLASS];
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_tc_cfg tc_cfg;
|
2019-11-05 01:38:56 +08:00
|
|
|
struct bpf_prog *xdp_prog;
|
2021-08-19 19:59:58 +08:00
|
|
|
struct ice_tx_ring **xdp_rings; /* XDP ring array */
|
2021-04-28 03:52:09 +08:00
|
|
|
unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */
|
2019-11-05 01:38:56 +08:00
|
|
|
u16 num_xdp_txq; /* Used XDP queues */
|
|
|
|
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
|
2020-11-21 08:39:27 +08:00
|
|
|
|
2021-08-20 08:08:54 +08:00
|
|
|
struct net_device **target_netdevs;
|
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */
|
|
|
|
|
|
|
|
/* Channel Specific Fields */
|
|
|
|
struct ice_vsi *tc_map_vsi[ICE_CHNL_MAX_TC];
|
|
|
|
u16 cnt_q_avail;
|
|
|
|
u16 next_base_q; /* next queue to be used for channel setup */
|
|
|
|
struct list_head ch_list;
|
|
|
|
u16 num_chnl_rxq;
|
|
|
|
u16 num_chnl_txq;
|
|
|
|
u16 ch_rss_size;
|
2021-10-16 07:35:17 +08:00
|
|
|
u16 num_chnl_fltr;
|
2021-10-16 07:35:15 +08:00
|
|
|
/* store away rss size info before configuring ADQ channels so that,
|
|
|
|
* it can be used after tc-qdisc delete, to get back RSS setting as
|
|
|
|
* they were before
|
|
|
|
*/
|
|
|
|
u16 orig_rss_size;
|
|
|
|
/* this keeps tracks of all enabled TC with and without DCB
|
|
|
|
* and inclusive of ADQ, vsi->mqprio_opt keeps track of queue
|
|
|
|
* information
|
|
|
|
*/
|
|
|
|
u8 all_numtc;
|
|
|
|
u16 all_enatc;
|
|
|
|
|
|
|
|
/* store away TC info, to be used for rebuild logic */
|
|
|
|
u8 old_numtc;
|
|
|
|
u16 old_ena_tc;
|
|
|
|
|
|
|
|
struct ice_channel *ch;
|
|
|
|
|
2020-11-21 08:39:27 +08:00
|
|
|
/* setup back reference, to which aggregator node this VSI
|
|
|
|
* corresponds to
|
|
|
|
*/
|
|
|
|
struct ice_agg_node *agg_node;
|
2018-03-20 22:58:11 +08:00
|
|
|
} ____cacheline_internodealigned_in_smp;
|
|
|
|
|
|
|
|
/* struct that defines an interrupt vector */
|
|
|
|
struct ice_q_vector {
|
|
|
|
struct ice_vsi *vsi;
|
2019-02-20 07:04:05 +08:00
|
|
|
|
2018-03-20 22:58:11 +08:00
|
|
|
u16 v_idx; /* index in the vsi->q_vector array. */
|
2019-03-01 07:25:59 +08:00
|
|
|
u16 reg_idx;
|
2018-10-27 02:44:47 +08:00
|
|
|
u8 num_ring_rx; /* total number of Rx rings in vector */
|
2019-02-20 07:04:05 +08:00
|
|
|
u8 num_ring_tx; /* total number of Tx rings in vector */
|
ice: replace custom AIM algorithm with kernel's DIM library
The ice driver has support for adaptive interrupt moderation, an
algorithm for tuning the interrupt rate dynamically. This algorithm
is based on various assumptions about ring size, socket buffer size,
link speed, SKB overhead, ethernet frame overhead and more.
The Linux kernel has support for a dynamic interrupt moderation
algorithm known as "dimlib". Replace the custom driver-specific
implementation of dynamic interrupt moderation with the kernel's
algorithm.
The Intel hardware has a different hardware implementation than the
originators of the dimlib code had to work with, which requires the
driver to use a slightly different set of inputs for the actual
moderation values, while getting all the advice from dimlib of
better/worse, shift left or right.
The change made for this implementation is to use a pair of values
for each of the 5 "slots" that the dimlib moderation expects, and
the driver will program those pairs when dimlib recommends a slot to
use. The currently implementation uses two tables, one for receive
and one for transmit, and the pairs of values in each slot set the
maximum delay of an interrupt and a maximum number of interrupts per
second (both expressed in microseconds).
There are two separate kinds of bugs fixed by using DIMLIB, one is
UDP single stream send was too slow, and the other is that 8K
ping-pong was going to the most aggressive moderation and has much
too high latency.
The overall result of using DIMLIB is that we meet or exceed our
performance expectations set based on the old algorithm.
Co-developed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-04-01 05:16:57 +08:00
|
|
|
u8 wb_on_itr:1; /* if true, WB on ITR is enabled */
|
2018-09-20 08:23:19 +08:00
|
|
|
/* in usecs, need to use ice_intrl_to_usecs_reg() before writing this
|
|
|
|
* value to the device
|
|
|
|
*/
|
|
|
|
u8 intrl;
|
2019-02-20 07:04:05 +08:00
|
|
|
|
|
|
|
struct napi_struct napi;
|
|
|
|
|
|
|
|
struct ice_ring_container rx;
|
|
|
|
struct ice_ring_container tx;
|
|
|
|
|
|
|
|
cpumask_t affinity_mask;
|
|
|
|
struct irq_affinity_notify affinity_notify;
|
|
|
|
|
2021-10-16 07:35:16 +08:00
|
|
|
struct ice_channel *ch;
|
|
|
|
|
2019-02-20 07:04:05 +08:00
|
|
|
char name[ICE_INT_NAME_STR_LEN];
|
ice: replace custom AIM algorithm with kernel's DIM library
The ice driver has support for adaptive interrupt moderation, an
algorithm for tuning the interrupt rate dynamically. This algorithm
is based on various assumptions about ring size, socket buffer size,
link speed, SKB overhead, ethernet frame overhead and more.
The Linux kernel has support for a dynamic interrupt moderation
algorithm known as "dimlib". Replace the custom driver-specific
implementation of dynamic interrupt moderation with the kernel's
algorithm.
The Intel hardware has a different hardware implementation than the
originators of the dimlib code had to work with, which requires the
driver to use a slightly different set of inputs for the actual
moderation values, while getting all the advice from dimlib of
better/worse, shift left or right.
The change made for this implementation is to use a pair of values
for each of the 5 "slots" that the dimlib moderation expects, and
the driver will program those pairs when dimlib recommends a slot to
use. The currently implementation uses two tables, one for receive
and one for transmit, and the pairs of values in each slot set the
maximum delay of an interrupt and a maximum number of interrupts per
second (both expressed in microseconds).
There are two separate kinds of bugs fixed by using DIMLIB, one is
UDP single stream send was too slow, and the other is that 8K
ping-pong was going to the most aggressive moderation and has much
too high latency.
The overall result of using DIMLIB is that we meet or exceed our
performance expectations set based on the old algorithm.
Co-developed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-04-01 05:16:57 +08:00
|
|
|
|
|
|
|
u16 total_events; /* net_dim(): number of interrupts processed */
|
2018-03-20 22:58:10 +08:00
|
|
|
} ____cacheline_internodealigned_in_smp;
|
|
|
|
|
|
|
|
enum ice_pf_flags {
|
|
|
|
ICE_FLAG_FLTR_SYNC,
|
2021-05-20 22:37:49 +08:00
|
|
|
ICE_FLAG_RDMA_ENA,
|
2018-03-20 22:58:10 +08:00
|
|
|
ICE_FLAG_RSS_ENA,
|
2018-09-20 08:42:55 +08:00
|
|
|
ICE_FLAG_SRIOV_ENA,
|
2018-09-20 08:42:54 +08:00
|
|
|
ICE_FLAG_SRIOV_CAPABLE,
|
2019-03-01 07:24:22 +08:00
|
|
|
ICE_FLAG_DCB_CAPABLE,
|
|
|
|
ICE_FLAG_DCB_ENA,
|
2020-05-12 09:01:40 +08:00
|
|
|
ICE_FLAG_FD_ENA,
|
2021-06-10 00:39:50 +08:00
|
|
|
ICE_FLAG_PTP_SUPPORTED, /* PTP is supported by NVM */
|
|
|
|
ICE_FLAG_PTP, /* PTP is enabled by software */
|
2021-05-20 22:37:49 +08:00
|
|
|
ICE_FLAG_AUX_ENA,
|
2019-09-09 21:47:46 +08:00
|
|
|
ICE_FLAG_ADV_FEATURES,
|
2021-10-16 07:35:15 +08:00
|
|
|
ICE_FLAG_TC_MQPRIO, /* support for Multi queue TC */
|
2021-08-06 16:49:05 +08:00
|
|
|
ICE_FLAG_CLS_FLOWER,
|
2018-12-20 02:03:26 +08:00
|
|
|
ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
|
2020-07-10 00:16:08 +08:00
|
|
|
ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
|
2019-06-26 17:20:17 +08:00
|
|
|
ICE_FLAG_NO_MEDIA,
|
2019-07-29 17:04:50 +08:00
|
|
|
ICE_FLAG_FW_LLDP_AGENT,
|
2021-05-06 23:40:01 +08:00
|
|
|
ICE_FLAG_MOD_POWER_UNSUPPORTED,
|
2021-10-14 00:02:19 +08:00
|
|
|
ICE_FLAG_PHY_FW_LOAD_FAILED,
|
2019-03-01 07:24:31 +08:00
|
|
|
ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */
|
2019-10-24 16:11:22 +08:00
|
|
|
ICE_FLAG_LEGACY_RX,
|
2020-05-08 08:40:59 +08:00
|
|
|
ICE_FLAG_VF_TRUE_PROMISC_ENA,
|
2020-02-14 05:31:16 +08:00
|
|
|
ICE_FLAG_MDD_AUTO_RESET_VF,
|
2020-07-10 00:16:07 +08:00
|
|
|
ICE_FLAG_LINK_LENIENT_MODE_ENA,
|
2018-03-20 22:58:10 +08:00
|
|
|
ICE_PF_FLAGS_NBITS /* must be last */
|
|
|
|
};
|
|
|
|
|
2021-08-20 08:08:54 +08:00
|
|
|
struct ice_switchdev_info {
|
|
|
|
struct ice_vsi *control_vsi;
|
|
|
|
struct ice_vsi *uplink_vsi;
|
|
|
|
bool is_running;
|
|
|
|
};
|
|
|
|
|
2020-11-21 08:39:27 +08:00
|
|
|
struct ice_agg_node {
|
|
|
|
u32 agg_id;
|
|
|
|
#define ICE_MAX_VSIS_IN_AGG_NODE 64
|
|
|
|
u32 num_vsis;
|
|
|
|
u8 valid;
|
|
|
|
};
|
|
|
|
|
2018-03-20 22:58:05 +08:00
|
|
|
struct ice_pf {
|
|
|
|
struct pci_dev *pdev;
|
ice: Split irq_tracker into sw_irq_tracker and hw_irq_tracker
For the PF driver, when mapping interrupts to queues, we need to request
IRQs from the kernel and we also have to allocate interrupts from
the device.
Similarly, when the VF driver (iavf.ko) initializes, it requests the kernel
IRQs that it needs but it can't directly allocate interrupts in the device.
Instead, it sends a mailbox message to the ice driver, which then allocates
interrupts in the device on the VF driver's behalf.
Currently both these cases end up having to reserve entries in
pf->irq_tracker but irq_tracker itself is sized based on how many vectors
the PF driver needs. Under the right circumstances, the VF driver can fail
to get entries in irq_tracker, which will result in the VF driver failing
probe.
To fix this, sw_irq_tracker and hw_irq_tracker are introduced. The
sw_irq_tracker tracks only the PF's IRQ request and doesn't play any
role in VF init. hw_irq_tracker represents the device's interrupt space.
When interrupts have to be allocated in the device for either PF or VF,
hw_irq_tracker will be looked up to see if the device has run out of
interrupts.
Signed-off-by: Preethi Banala <preethi.banala@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-09-20 08:23:16 +08:00
|
|
|
|
2020-03-27 02:37:18 +08:00
|
|
|
struct devlink_region *nvm_region;
|
2021-10-12 08:41:10 +08:00
|
|
|
struct devlink_region *sram_region;
|
2020-06-19 02:46:11 +08:00
|
|
|
struct devlink_region *devcaps_region;
|
2020-03-27 02:37:18 +08:00
|
|
|
|
2021-08-20 08:08:49 +08:00
|
|
|
/* devlink port data */
|
|
|
|
struct devlink_port devlink_port;
|
|
|
|
|
ice: Split irq_tracker into sw_irq_tracker and hw_irq_tracker
For the PF driver, when mapping interrupts to queues, we need to request
IRQs from the kernel and we also have to allocate interrupts from
the device.
Similarly, when the VF driver (iavf.ko) initializes, it requests the kernel
IRQs that it needs but it can't directly allocate interrupts in the device.
Instead, it sends a mailbox message to the ice driver, which then allocates
interrupts in the device on the VF driver's behalf.
Currently both these cases end up having to reserve entries in
pf->irq_tracker but irq_tracker itself is sized based on how many vectors
the PF driver needs. Under the right circumstances, the VF driver can fail
to get entries in irq_tracker, which will result in the VF driver failing
probe.
To fix this, sw_irq_tracker and hw_irq_tracker are introduced. The
sw_irq_tracker tracks only the PF's IRQ request and doesn't play any
role in VF init. hw_irq_tracker represents the device's interrupt space.
When interrupts have to be allocated in the device for either PF or VF,
hw_irq_tracker will be looked up to see if the device has run out of
interrupts.
Signed-off-by: Preethi Banala <preethi.banala@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-09-20 08:23:16 +08:00
|
|
|
/* OS reserved IRQ details */
|
2018-03-20 22:58:10 +08:00
|
|
|
struct msix_entry *msix_entries;
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
struct ice_res_tracker *irq_tracker;
|
|
|
|
/* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
|
|
|
|
* number of MSIX vectors needed for all SR-IOV VFs from the number of
|
|
|
|
* MSIX vectors allowed on this PF.
|
|
|
|
*/
|
|
|
|
u16 sriov_base_vector;
|
ice: Split irq_tracker into sw_irq_tracker and hw_irq_tracker
For the PF driver, when mapping interrupts to queues, we need to request
IRQs from the kernel and we also have to allocate interrupts from
the device.
Similarly, when the VF driver (iavf.ko) initializes, it requests the kernel
IRQs that it needs but it can't directly allocate interrupts in the device.
Instead, it sends a mailbox message to the ice driver, which then allocates
interrupts in the device on the VF driver's behalf.
Currently both these cases end up having to reserve entries in
pf->irq_tracker but irq_tracker itself is sized based on how many vectors
the PF driver needs. Under the right circumstances, the VF driver can fail
to get entries in irq_tracker, which will result in the VF driver failing
probe.
To fix this, sw_irq_tracker and hw_irq_tracker are introduced. The
sw_irq_tracker tracks only the PF's IRQ request and doesn't play any
role in VF init. hw_irq_tracker represents the device's interrupt space.
When interrupts have to be allocated in the device for either PF or VF,
hw_irq_tracker will be looked up to see if the device has run out of
interrupts.
Signed-off-by: Preethi Banala <preethi.banala@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2018-09-20 08:23:16 +08:00
|
|
|
|
2020-05-12 09:01:40 +08:00
|
|
|
u16 ctrl_vsi_idx; /* control VSI index in pf->vsi array */
|
|
|
|
|
2018-03-20 22:58:10 +08:00
|
|
|
struct ice_vsi **vsi; /* VSIs created by the driver */
|
|
|
|
struct ice_sw *first_sw; /* first switch created by firmware */
|
2021-08-20 08:08:48 +08:00
|
|
|
u16 eswitch_mode; /* current mode of eswitch */
|
2018-09-20 08:42:55 +08:00
|
|
|
/* Virtchnl/SR-IOV config info */
|
|
|
|
struct ice_vf *vf;
|
2020-05-08 08:41:06 +08:00
|
|
|
u16 num_alloc_vfs; /* actual number of VFs allocated */
|
2018-09-20 08:42:54 +08:00
|
|
|
u16 num_vfs_supported; /* num VFs supported for this PF */
|
2020-02-28 02:14:53 +08:00
|
|
|
u16 num_qps_per_vf;
|
|
|
|
u16 num_msix_per_vf;
|
2020-02-14 05:31:16 +08:00
|
|
|
/* used to ratelimit the MDD event logging */
|
|
|
|
unsigned long last_printed_mdd_jiffies;
|
2021-03-03 02:12:00 +08:00
|
|
|
DECLARE_BITMAP(malvfs, ICE_MAX_VF_COUNT);
|
2021-07-17 06:16:41 +08:00
|
|
|
DECLARE_BITMAP(features, ICE_F_MAX);
|
2021-03-03 02:15:38 +08:00
|
|
|
DECLARE_BITMAP(state, ICE_STATE_NBITS);
|
2018-03-20 22:58:10 +08:00
|
|
|
DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
|
2019-08-02 16:25:21 +08:00
|
|
|
unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */
|
|
|
|
unsigned long *avail_rxqs; /* bitmap to track PF Rx queue usage */
|
2018-03-20 22:58:10 +08:00
|
|
|
unsigned long serv_tmr_period;
|
|
|
|
unsigned long serv_tmr_prev;
|
|
|
|
struct timer_list serv_tmr;
|
|
|
|
struct work_struct serv_task;
|
|
|
|
struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */
|
|
|
|
struct mutex sw_mutex; /* lock for protecting VSI alloc flow */
|
2019-11-06 18:05:29 +08:00
|
|
|
struct mutex tc_mutex; /* lock to protect TC changes */
|
2018-03-20 22:58:05 +08:00
|
|
|
u32 msg_enable;
|
2021-06-10 00:39:50 +08:00
|
|
|
struct ice_ptp ptp;
|
2021-05-20 22:37:49 +08:00
|
|
|
u16 num_rdma_msix; /* Total MSIX vectors for RDMA driver */
|
|
|
|
u16 rdma_base_vector;
|
ice: implement device flash update via devlink
Use the newly added pldmfw library to implement device flash update for
the Intel ice networking device driver. This support uses the devlink
flash update interface.
The main parts of the flash include the Option ROM, the netlist module,
and the main NVM data. The PLDM firmware file contains modules for each
of these components.
Using the pldmfw library, the provided firmware file will be scanned for
the three major components, "fw.undi" for the Option ROM, "fw.mgmt" for
the main NVM module containing the primary device firmware, and
"fw.netlist" containing the netlist module.
The flash is separated into two banks, the active bank containing the
running firmware, and the inactive bank which we use for update. Each
module is updated in a staged process. First, the inactive bank is
erased, preparing the device for update. Second, the contents of the
component are copied to the inactive portion of the flash. After all
components are updated, the driver signals the device to switch the
active bank during the next EMP reset (which would usually occur during
the next reboot).
Although the firmware AdminQ interface does report an immediate status
for each command, the NVM erase and NVM write commands receive status
asynchronously. The driver must not continue writing until previous
erase and write commands have finished. The real status of the NVM
commands is returned over the receive AdminQ. Implement a simple
interface that uses a wait queue so that the main update thread can
sleep until the completion status is reported by firmware. For erasing
the inactive banks, this can take quite a while in practice.
To help visualize the process to the devlink application and other
applications based on the devlink netlink interface, status is reported
via the devlink_flash_update_status_notify. While we do report status
after each 4k block when writing, there is no real status we can report
during erasing. We simply must wait for the complete module erasure to
finish.
With this implementation, basic flash update for the ice hardware is
supported.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-24 08:22:03 +08:00
|
|
|
|
|
|
|
/* spinlock to protect the AdminQ wait list */
|
|
|
|
spinlock_t aq_wait_lock;
|
|
|
|
struct hlist_head aq_wait_list;
|
|
|
|
wait_queue_head_t aq_wait_queue;
|
ice: support immediate firmware activation via devlink reload
The ice hardware contains an embedded chip with firmware which can be
updated using devlink flash. The firmware which runs on this chip is
referred to as the Embedded Management Processor firmware (EMP
firmware).
Activating the new firmware image currently requires that the system be
rebooted. This is not ideal as rebooting the system can cause unwanted
downtime.
In practical terms, activating the firmware does not always require a
full system reboot. In many cases it is possible to activate the EMP
firmware immediately. There are a couple of different scenarios to
cover.
* The EMP firmware itself can be reloaded by issuing a special update
to the device called an Embedded Management Processor reset (EMP
reset). This reset causes the device to reset and reload the EMP
firmware.
* PCI configuration changes are only reloaded after a cold PCIe reset.
Unfortunately there is no generic way to trigger this for a PCIe
device without a system reboot.
When performing a flash update, firmware is capable of responding with
some information about the specific update requirements.
The driver updates the flash by programming a secondary inactive bank
with the contents of the new image, and then issuing a command to
request to switch the active bank starting from the next load.
The response to the final command for updating the inactive NVM flash
bank includes an indication of the minimum reset required to fully
update the device. This can be one of the following:
* A full power on is required
* A cold PCIe reset is required
* An EMP reset is required
The response to the command to switch flash banks includes an indication
of whether or not the firmware will allow an EMP reset request.
For most updates, an EMP reset is sufficient to load the new EMP
firmware without issues. In some cases, this reset is not sufficient
because the PCI configuration space has changed. When this could cause
incompatibility with the new EMP image, the firmware is capable of
rejecting the EMP reset request.
Add logic to ice_fw_update.c to handle the response data flash update
AdminQ commands.
For the reset level, issue a devlink status notification informing the
user of how to complete the update with a simple suggestion like
"Activate new firmware by rebooting the system".
Cache the status of whether or not firmware will restrict the EMP reset
for use in implementing devlink reload.
Implement support for devlink reload with the "fw_activate" flag. This
allows user space to request the firmware be activated immediately.
For the .reload_down handler, we will issue a request for the EMP reset
using the appropriate firmware AdminQ command. If we know that the
firmware will not allow an EMP reset, simply exit with a suitable
netlink extended ACK message indicating that the EMP reset is not
available.
For the .reload_up handler, simply wait until the driver has finished
resetting. Logic to handle processing of an EMP reset already exists in
the driver as part of its reset and rebuild flows.
Implement support for the devlink reload interface with the
"fw_activate" action. This allows userspace to request activation of
firmware without a reboot.
Note that support for indicating the required reset and EMP reset
restriction is not supported on old versions of firmware. The driver can
determine if the two features are supported by checking the device
capabilities report. I confirmed support has existed since at least
version 5.5.2 as reported by the 'fw.mgmt' version. Support to issue the
EMP reset request has existed in all version of the EMP firmware for the
ice hardware.
Check the device capabilities report to determine whether or not the
indications are reported by the running firmware. If the reset
requirement indication is not supported, always assume a full power on
is necessary. If the reset restriction capability is not supported,
always assume the EMP reset is available.
Users can verify if the EMP reset has activated the firmware by using
the devlink info report to check that the 'running' firmware version has
updated. For example a user might do the following:
# Check current version
$ devlink dev info
# Update the device
$ devlink dev flash pci/0000:af:00.0 file firmware.bin
# Confirm stored version updated
$ devlink dev info
# Reload to activate new firmware
$ devlink dev reload pci/0000:af:00.0 action fw_activate
# Confirm running version updated
$ devlink dev info
Finally, this change does *not* implement basic driver-only reload
support. I did look into trying to do this. However, it requires
significant refactor of how the ice driver probes and loads everything.
The ice driver probe and allocation flows were not designed with such
a reload in mind. Refactoring the flow to support this is beyond the
scope of this change.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2021-10-28 07:22:55 +08:00
|
|
|
bool fw_emp_reset_disabled;
|
ice: implement device flash update via devlink
Use the newly added pldmfw library to implement device flash update for
the Intel ice networking device driver. This support uses the devlink
flash update interface.
The main parts of the flash include the Option ROM, the netlist module,
and the main NVM data. The PLDM firmware file contains modules for each
of these components.
Using the pldmfw library, the provided firmware file will be scanned for
the three major components, "fw.undi" for the Option ROM, "fw.mgmt" for
the main NVM module containing the primary device firmware, and
"fw.netlist" containing the netlist module.
The flash is separated into two banks, the active bank containing the
running firmware, and the inactive bank which we use for update. Each
module is updated in a staged process. First, the inactive bank is
erased, preparing the device for update. Second, the contents of the
component are copied to the inactive portion of the flash. After all
components are updated, the driver signals the device to switch the
active bank during the next EMP reset (which would usually occur during
the next reboot).
Although the firmware AdminQ interface does report an immediate status
for each command, the NVM erase and NVM write commands receive status
asynchronously. The driver must not continue writing until previous
erase and write commands have finished. The real status of the NVM
commands is returned over the receive AdminQ. Implement a simple
interface that uses a wait queue so that the main update thread can
sleep until the completion status is reported by firmware. For erasing
the inactive banks, this can take quite a while in practice.
To help visualize the process to the devlink application and other
applications based on the devlink netlink interface, status is reported
via the devlink_flash_update_status_notify. While we do report status
after each 4k block when writing, there is no real status we can report
during erasing. We simply must wait for the complete module erasure to
finish.
With this implementation, basic flash update for the ice hardware is
supported.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-24 08:22:03 +08:00
|
|
|
|
2021-05-06 23:39:59 +08:00
|
|
|
wait_queue_head_t reset_wait_queue;
|
|
|
|
|
2018-03-20 22:58:15 +08:00
|
|
|
u32 hw_csum_rx_error;
|
2020-05-08 08:41:05 +08:00
|
|
|
u16 oicr_idx; /* Other interrupt cause MSIX vector index */
|
|
|
|
u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */
|
2019-08-02 16:25:21 +08:00
|
|
|
u16 max_pf_txqs; /* Total Tx queues PF wide */
|
|
|
|
u16 max_pf_rxqs; /* Total Rx queues PF wide */
|
2020-05-08 08:41:05 +08:00
|
|
|
u16 num_lan_msix; /* Total MSIX vectors for base driver */
|
2019-02-20 07:04:13 +08:00
|
|
|
u16 num_lan_tx; /* num LAN Tx queues setup */
|
|
|
|
u16 num_lan_rx; /* num LAN Rx queues setup */
|
2018-03-20 22:58:10 +08:00
|
|
|
u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */
|
|
|
|
u16 num_alloc_vsi;
|
2018-03-20 22:58:18 +08:00
|
|
|
u16 corer_count; /* Core reset count */
|
|
|
|
u16 globr_count; /* Global reset count */
|
|
|
|
u16 empr_count; /* EMP reset count */
|
|
|
|
u16 pfr_count; /* PF reset count */
|
|
|
|
|
2020-07-10 00:16:03 +08:00
|
|
|
u8 wol_ena : 1; /* software state of WoL */
|
|
|
|
u32 wakeup_reason; /* last wakeup reason */
|
2018-03-20 22:58:16 +08:00
|
|
|
struct ice_hw_port_stats stats;
|
|
|
|
struct ice_hw_port_stats stats_prev;
|
2018-03-20 22:58:05 +08:00
|
|
|
struct ice_hw hw;
|
2019-04-17 01:24:35 +08:00
|
|
|
u8 stat_prev_loaded:1; /* has previous stats been loaded */
|
2021-10-19 07:16:02 +08:00
|
|
|
u8 rdma_mode;
|
2019-03-01 07:24:24 +08:00
|
|
|
u16 dcbx_cap;
|
2018-08-09 21:29:53 +08:00
|
|
|
u32 tx_timeout_count;
|
|
|
|
unsigned long tx_timeout_last_recovery;
|
|
|
|
u32 tx_timeout_recovery_level;
|
2018-03-20 22:58:10 +08:00
|
|
|
char int_name[ICE_INT_NAME_STR_LEN];
|
2021-05-20 22:37:49 +08:00
|
|
|
struct auxiliary_device *adev;
|
|
|
|
int aux_idx;
|
2019-04-17 01:30:43 +08:00
|
|
|
u32 sw_int_count;
|
2021-10-16 07:35:17 +08:00
|
|
|
/* count of tc_flower filters specific to channel (aka where filter
|
|
|
|
* action is "hw_tc <tc_num>")
|
|
|
|
*/
|
|
|
|
u16 num_dmac_chnl_fltrs;
|
2021-08-06 16:49:05 +08:00
|
|
|
struct hlist_head tc_flower_fltr_list;
|
|
|
|
|
2020-07-10 00:16:06 +08:00
|
|
|
__le64 nvm_phy_type_lo; /* NVM PHY type low */
|
|
|
|
__le64 nvm_phy_type_hi; /* NVM PHY type high */
|
2020-07-10 00:16:07 +08:00
|
|
|
struct ice_link_default_override_tlv link_dflt_override;
|
2020-11-21 08:39:26 +08:00
|
|
|
struct ice_lag *lag; /* Link Aggregation information */
|
2020-11-21 08:39:27 +08:00
|
|
|
|
2021-08-20 08:08:54 +08:00
|
|
|
struct ice_switchdev_info switchdev;
|
|
|
|
|
2020-11-21 08:39:27 +08:00
|
|
|
#define ICE_INVALID_AGG_NODE_ID 0
|
|
|
|
#define ICE_PF_AGG_NODE_ID_START 1
|
|
|
|
#define ICE_MAX_PF_AGG_NODES 32
|
|
|
|
struct ice_agg_node pf_agg_node[ICE_MAX_PF_AGG_NODES];
|
|
|
|
#define ICE_VF_AGG_NODE_ID_START 65
|
|
|
|
#define ICE_MAX_VF_AGG_NODES 32
|
|
|
|
struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES];
|
2018-03-20 22:58:05 +08:00
|
|
|
};
|
2018-03-20 22:58:10 +08:00
|
|
|
|
2018-03-20 22:58:11 +08:00
|
|
|
struct ice_netdev_priv {
|
|
|
|
struct ice_vsi *vsi;
|
2021-08-20 08:08:50 +08:00
|
|
|
struct ice_repr *repr;
|
2021-10-13 02:31:03 +08:00
|
|
|
/* indirect block callbacks on registered higher level devices
|
|
|
|
* (e.g. tunnel devices)
|
|
|
|
*
|
|
|
|
* tc_indr_block_cb_priv_list is used to look up indirect callback
|
|
|
|
* private data
|
|
|
|
*/
|
|
|
|
struct list_head tc_indr_block_priv_list;
|
2018-03-20 22:58:11 +08:00
|
|
|
};
|
|
|
|
|
2021-10-16 07:35:16 +08:00
|
|
|
/**
|
|
|
|
* ice_vector_ch_enabled
|
|
|
|
* @qv: pointer to q_vector, can be NULL
|
|
|
|
*
|
|
|
|
* This function returns true if vector is channel enabled otherwise false
|
|
|
|
*/
|
|
|
|
static inline bool ice_vector_ch_enabled(struct ice_q_vector *qv)
|
|
|
|
{
|
|
|
|
return !!qv->ch; /* Enable it to run with TC */
|
|
|
|
}
|
|
|
|
|
2018-03-20 22:58:10 +08:00
|
|
|
/**
|
|
|
|
* ice_irq_dynamic_ena - Enable default interrupt generation settings
|
2019-02-20 07:04:13 +08:00
|
|
|
* @hw: pointer to HW struct
|
|
|
|
* @vsi: pointer to VSI struct, can be NULL
|
2018-03-20 22:58:13 +08:00
|
|
|
* @q_vector: pointer to q_vector, can be NULL
|
2018-03-20 22:58:10 +08:00
|
|
|
*/
|
2019-02-27 08:35:11 +08:00
|
|
|
static inline void
|
|
|
|
ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
|
|
|
|
struct ice_q_vector *q_vector)
|
2018-03-20 22:58:10 +08:00
|
|
|
{
|
2019-03-01 07:25:59 +08:00
|
|
|
u32 vector = (vsi && q_vector) ? q_vector->reg_idx :
|
ice: Refactor interrupt tracking
Currently we have two MSI-x (IRQ) trackers, one for OS requested MSI-x
entries (sw_irq_tracker) and one for hardware MSI-x vectors
(hw_irq_tracker). Generally the sw_irq_tracker has less entries than the
hw_irq_tracker because the hw_irq_tracker has entries equal to the max
allowed MSI-x per PF and the sw_irq_tracker is mainly the minimum (non
SR-IOV portion of the vectors, kernel granted IRQs). All of the non
SR-IOV portions of the driver (i.e. LAN queues, RDMA queues, OICR, etc.)
take at least one of each type of tracker resource. SR-IOV only grabs
entries from the hw_irq_tracker. There are a few issues with this approach
that can be seen when doing any kind of device reconfiguration (i.e.
ethtool -L, SR-IOV, etc.). One of them being, any time the driver creates
an ice_q_vector and associates it to a LAN queue pair it will grab and
use one entry from the hw_irq_tracker and one from the sw_irq_tracker.
If the indices on these does not match it will cause a Tx timeout, which
will cause a reset and then the indices will match up again and traffic
will resume. The mismatched indices come from the trackers not being the
same size and/or the search_hint in the two trackers not being equal.
Another reason for the refactor is the co-existence of features with
SR-IOV. If SR-IOV is enabled and the interrupts are taken from the end
of the sw_irq_tracker then other features can no longer use this space
because the hardware has now given the remaining interrupts to SR-IOV.
This patch reworks how we track MSI-x vectors by removing the
hw_irq_tracker completely and instead MSI-x resources needed for SR-IOV
are determined all at once instead of per VF. This can be done because
when creating VFs we know how many are wanted and how many MSI-x vectors
each VF needs. This also allows us to start using MSI-x resources from
the end of the PF's allowed MSI-x vectors so we are less likely to use
entries needed for other features (i.e. RDMA, L2 Offload, etc).
This patch also reworks the ice_res_tracker structure by removing the
search_hint and adding a new member - "end". Instead of having a
search_hint we will always search from 0. The new member, "end", will be
used to manipulate the end of the ice_res_tracker (specifically
sw_irq_tracker) during runtime based on MSI-x vectors needed by SR-IOV.
In the normal case, the end of ice_res_tracker will be equal to the
ice_res_tracker's num_entries.
The sriov_base_vector member was added to the PF structure. It is used
to represent the starting MSI-x index of all the needed MSI-x vectors
for all SR-IOV VFs. Depending on how many MSI-x are needed, SR-IOV may
have to take resources from the sw_irq_tracker. This is done by setting
the sw_irq_tracker->end equal to the pf->sriov_base_vector. When all
SR-IOV VFs are removed then the sw_irq_tracker->end is reset back to
sw_irq_tracker->num_entries. The sriov_base_vector, along with the VF's
number of MSI-x (pf->num_vf_msix), vf_id, and the base MSI-x index on
the PF (pf->hw.func_caps.common_cap.msix_vector_first_id), is used to
calculate the first HW absolute MSI-x index for each VF, which is used
to write to the VPINT_ALLOC[_PCI] and GLINT_VECT2FUNC registers to
program the VFs MSI-x PCI configuration bits. Also, the sriov_base_vector
is used along with VF's num_vf_msix, vf_id, and q_vector->v_idx to
determine the MSI-x register index (used for writing to GLINT_DYN_CTL)
within the PF's space.
Interrupt changes removed any references to hw_base_vector, hw_oicr_idx,
and hw_irq_tracker. Only sw_base_vector, sw_oicr_idx, and sw_irq_tracker
variables remain. Change all of these by removing the "sw_" prefix to
help avoid confusion with these variables and their use.
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2019-04-17 01:30:44 +08:00
|
|
|
((struct ice_pf *)hw->back)->oicr_idx;
|
2018-03-20 22:58:10 +08:00
|
|
|
int itr = ICE_ITR_NONE;
|
|
|
|
u32 val;
|
|
|
|
|
|
|
|
/* clear the PBA here, as this function is meant to clean out all
|
|
|
|
* previous interrupts and enable the interrupt
|
|
|
|
*/
|
|
|
|
val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
|
|
|
|
(itr << GLINT_DYN_CTL_ITR_INDX_S);
|
2018-03-20 22:58:13 +08:00
|
|
|
if (vsi)
|
2021-03-03 02:15:37 +08:00
|
|
|
if (test_bit(ICE_VSI_DOWN, vsi->state))
|
2018-03-20 22:58:13 +08:00
|
|
|
return;
|
2018-03-20 22:58:10 +08:00
|
|
|
wr32(hw, GLINT_DYN_CTL(vector), val);
|
|
|
|
}
|
2018-03-20 22:58:13 +08:00
|
|
|
|
2019-09-09 21:47:46 +08:00
|
|
|
/**
|
|
|
|
* ice_netdev_to_pf - Retrieve the PF struct associated with a netdev
|
|
|
|
* @netdev: pointer to the netdev struct
|
|
|
|
*/
|
|
|
|
static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
|
|
|
|
{
|
|
|
|
struct ice_netdev_priv *np = netdev_priv(netdev);
|
|
|
|
|
|
|
|
return np->vsi->back;
|
|
|
|
}
|
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
|
|
|
|
{
|
|
|
|
return !!vsi->xdp_prog;
|
|
|
|
}
|
|
|
|
|
2021-08-19 19:59:58 +08:00
|
|
|
static inline void ice_set_ring_xdp(struct ice_tx_ring *ring)
|
2019-11-05 01:38:56 +08:00
|
|
|
{
|
|
|
|
ring->flags |= ICE_TX_FLAGS_RING_XDP;
|
|
|
|
}
|
|
|
|
|
2019-11-05 01:38:56 +08:00
|
|
|
/**
|
2020-08-28 16:26:15 +08:00
|
|
|
* ice_xsk_pool - get XSK buffer pool bound to a ring
|
2021-08-19 19:59:58 +08:00
|
|
|
* @ring: Rx ring to use
|
2019-11-05 01:38:56 +08:00
|
|
|
*
|
2020-08-28 16:26:15 +08:00
|
|
|
* Returns a pointer to xdp_umem structure if there is a buffer pool present,
|
2019-11-05 01:38:56 +08:00
|
|
|
* NULL otherwise.
|
|
|
|
*/
|
2021-08-19 19:59:58 +08:00
|
|
|
static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
|
2019-11-05 01:38:56 +08:00
|
|
|
{
|
2021-04-28 03:52:09 +08:00
|
|
|
struct ice_vsi *vsi = ring->vsi;
|
2019-12-12 19:13:06 +08:00
|
|
|
u16 qid = ring->q_index;
|
2019-11-05 01:38:56 +08:00
|
|
|
|
2021-08-19 19:59:58 +08:00
|
|
|
if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return xsk_get_pool_from_qid(vsi->netdev, qid);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_tx_xsk_pool - get XSK buffer pool bound to a ring
|
|
|
|
* @ring: Tx ring to use
|
|
|
|
*
|
|
|
|
* Returns a pointer to xdp_umem structure if there is a buffer pool present,
|
|
|
|
* NULL otherwise. Tx equivalent of ice_xsk_pool.
|
|
|
|
*/
|
|
|
|
static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring)
|
|
|
|
{
|
|
|
|
struct ice_vsi *vsi = ring->vsi;
|
|
|
|
u16 qid;
|
|
|
|
|
|
|
|
qid = ring->q_index - vsi->num_xdp_txq;
|
2019-11-05 01:38:56 +08:00
|
|
|
|
2021-04-28 03:52:09 +08:00
|
|
|
if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
|
2019-11-05 01:38:56 +08:00
|
|
|
return NULL;
|
|
|
|
|
2021-04-28 03:52:09 +08:00
|
|
|
return xsk_get_pool_from_qid(vsi->netdev, qid);
|
2019-11-05 01:38:56 +08:00
|
|
|
}
|
|
|
|
|
2019-03-01 07:26:01 +08:00
|
|
|
/**
|
2019-08-08 22:39:33 +08:00
|
|
|
* ice_get_main_vsi - Get the PF VSI
|
|
|
|
* @pf: PF instance
|
|
|
|
*
|
|
|
|
* returns pf->vsi[0], which by definition is the PF VSI
|
2019-03-01 07:26:01 +08:00
|
|
|
*/
|
2019-08-08 22:39:33 +08:00
|
|
|
static inline struct ice_vsi *ice_get_main_vsi(struct ice_pf *pf)
|
2019-03-01 07:26:01 +08:00
|
|
|
{
|
2019-08-08 22:39:33 +08:00
|
|
|
if (pf->vsi)
|
|
|
|
return pf->vsi[0];
|
2019-03-01 07:26:01 +08:00
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-08-20 08:08:59 +08:00
|
|
|
/**
|
|
|
|
* ice_get_netdev_priv_vsi - return VSI associated with netdev priv.
|
|
|
|
* @np: private netdev structure
|
|
|
|
*/
|
|
|
|
static inline struct ice_vsi *ice_get_netdev_priv_vsi(struct ice_netdev_priv *np)
|
|
|
|
{
|
|
|
|
/* In case of port representor return source port VSI. */
|
|
|
|
if (np->repr)
|
|
|
|
return np->repr->src_vsi;
|
|
|
|
else
|
|
|
|
return np->vsi;
|
|
|
|
}
|
|
|
|
|
2020-05-12 09:01:40 +08:00
|
|
|
/**
|
|
|
|
* ice_get_ctrl_vsi - Get the control VSI
|
|
|
|
* @pf: PF instance
|
|
|
|
*/
|
|
|
|
static inline struct ice_vsi *ice_get_ctrl_vsi(struct ice_pf *pf)
|
|
|
|
{
|
|
|
|
/* if pf->ctrl_vsi_idx is ICE_NO_VSI, control VSI was not set up */
|
|
|
|
if (!pf->vsi || pf->ctrl_vsi_idx == ICE_NO_VSI)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return pf->vsi[pf->ctrl_vsi_idx];
|
|
|
|
}
|
|
|
|
|
2021-08-20 08:08:54 +08:00
|
|
|
/**
|
|
|
|
* ice_is_switchdev_running - check if switchdev is configured
|
|
|
|
* @pf: pointer to PF structure
|
|
|
|
*
|
|
|
|
* Returns true if eswitch mode is set to DEVLINK_ESWITCH_MODE_SWITCHDEV
|
|
|
|
* and switchdev is configured, false otherwise.
|
|
|
|
*/
|
|
|
|
static inline bool ice_is_switchdev_running(struct ice_pf *pf)
|
|
|
|
{
|
|
|
|
return pf->switchdev.is_running;
|
|
|
|
}
|
|
|
|
|
2020-11-21 08:39:26 +08:00
|
|
|
/**
|
|
|
|
* ice_set_sriov_cap - enable SRIOV in PF flags
|
|
|
|
* @pf: PF struct
|
|
|
|
*/
|
|
|
|
static inline void ice_set_sriov_cap(struct ice_pf *pf)
|
|
|
|
{
|
|
|
|
if (pf->hw.func_caps.common_cap.sr_iov_1_1)
|
|
|
|
set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_clear_sriov_cap - disable SRIOV in PF flags
|
|
|
|
* @pf: PF struct
|
|
|
|
*/
|
|
|
|
static inline void ice_clear_sriov_cap(struct ice_pf *pf)
|
|
|
|
{
|
|
|
|
clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
|
|
|
|
}
|
|
|
|
|
2020-05-12 09:01:41 +08:00
|
|
|
#define ICE_FD_STAT_CTR_BLOCK_COUNT 256
|
|
|
|
#define ICE_FD_STAT_PF_IDX(base_idx) \
|
|
|
|
((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT)
|
|
|
|
#define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx)
|
2021-12-30 02:54:33 +08:00
|
|
|
#define ICE_FD_STAT_CH 1
|
|
|
|
#define ICE_FD_CH_STAT_IDX(base_idx) \
|
|
|
|
(ICE_FD_STAT_PF_IDX(base_idx) + ICE_FD_STAT_CH)
|
2020-05-12 09:01:41 +08:00
|
|
|
|
2021-10-16 07:35:15 +08:00
|
|
|
/**
|
|
|
|
* ice_is_adq_active - any active ADQs
|
|
|
|
* @pf: pointer to PF
|
|
|
|
*
|
|
|
|
* This function returns true if there are any ADQs configured (which is
|
|
|
|
* determined by looking at VSI type (which should be VSI_PF), numtc, and
|
|
|
|
* TC_MQPRIO flag) otherwise return false
|
|
|
|
*/
|
|
|
|
static inline bool ice_is_adq_active(struct ice_pf *pf)
|
|
|
|
{
|
|
|
|
struct ice_vsi *vsi;
|
|
|
|
|
|
|
|
vsi = ice_get_main_vsi(pf);
|
|
|
|
if (!vsi)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* is ADQ configured */
|
|
|
|
if (vsi->tc_cfg.numtc > ICE_CHNL_START_TC &&
|
|
|
|
test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-11-21 08:39:26 +08:00
|
|
|
bool netif_is_ice(struct net_device *dev);
|
2019-04-17 01:30:43 +08:00
|
|
|
int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
|
|
|
|
int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
|
2020-05-12 09:01:40 +08:00
|
|
|
int ice_vsi_open_ctrl(struct ice_vsi *vsi);
|
2021-08-20 08:08:54 +08:00
|
|
|
int ice_vsi_open(struct ice_vsi *vsi);
|
2018-03-20 22:58:16 +08:00
|
|
|
void ice_set_ethtool_ops(struct net_device *netdev);
|
2021-08-20 08:08:59 +08:00
|
|
|
void ice_set_ethtool_repr_ops(struct net_device *netdev);
|
2019-09-09 21:47:46 +08:00
|
|
|
void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
|
2019-09-03 16:31:06 +08:00
|
|
|
u16 ice_get_avail_txq_count(struct ice_pf *pf);
|
|
|
|
u16 ice_get_avail_rxq_count(struct ice_pf *pf);
|
2019-11-08 22:23:29 +08:00
|
|
|
int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx);
|
2019-07-25 17:53:50 +08:00
|
|
|
void ice_update_vsi_stats(struct ice_vsi *vsi);
|
|
|
|
void ice_update_pf_stats(struct ice_pf *pf);
|
2018-03-20 22:58:16 +08:00
|
|
|
int ice_up(struct ice_vsi *vsi);
|
|
|
|
int ice_down(struct ice_vsi *vsi);
|
2019-04-17 01:30:43 +08:00
|
|
|
int ice_vsi_cfg(struct ice_vsi *vsi);
|
|
|
|
struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
|
2021-08-19 20:00:03 +08:00
|
|
|
int ice_vsi_determine_xdp_res(struct ice_vsi *vsi);
|
2019-11-05 01:38:56 +08:00
|
|
|
int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
|
|
|
|
int ice_destroy_xdp_rings(struct ice_vsi *vsi);
|
|
|
|
int
|
|
|
|
ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
|
|
|
|
u32 flags);
|
2021-03-03 02:15:36 +08:00
|
|
|
int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size);
|
|
|
|
int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size);
|
|
|
|
int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed);
|
|
|
|
int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed);
|
2018-03-20 22:58:15 +08:00
|
|
|
void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
|
2019-11-08 22:23:29 +08:00
|
|
|
int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
|
2018-03-20 22:58:16 +08:00
|
|
|
void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
|
2021-05-20 22:37:51 +08:00
|
|
|
int ice_plug_aux_dev(struct ice_pf *pf);
|
|
|
|
void ice_unplug_aux_dev(struct ice_pf *pf);
|
2021-05-20 22:37:49 +08:00
|
|
|
int ice_init_rdma(struct ice_pf *pf);
|
2020-05-08 08:41:04 +08:00
|
|
|
const char *ice_aq_str(enum ice_aq_err aq_err);
|
2021-02-27 05:19:30 +08:00
|
|
|
bool ice_is_wol_supported(struct ice_hw *hw);
|
2021-12-30 02:54:33 +08:00
|
|
|
void ice_fdir_del_all_fltrs(struct ice_vsi *vsi);
|
2020-05-12 09:01:46 +08:00
|
|
|
int
|
|
|
|
ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
|
|
|
|
bool is_tun);
|
2020-05-12 09:01:40 +08:00
|
|
|
void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena);
|
2020-05-12 09:01:42 +08:00
|
|
|
int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
|
|
|
|
int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
|
2020-05-12 09:01:41 +08:00
|
|
|
int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd);
|
|
|
|
int
|
|
|
|
ice_get_fdir_fltr_ids(struct ice_hw *hw, struct ethtool_rxnfc *cmd,
|
|
|
|
u32 *rule_locs);
|
2021-12-30 02:54:33 +08:00
|
|
|
void ice_fdir_rem_adq_chnl(struct ice_hw *hw, u16 vsi_idx);
|
2020-05-12 09:01:40 +08:00
|
|
|
void ice_fdir_release_flows(struct ice_hw *hw);
|
2020-05-12 09:01:45 +08:00
|
|
|
void ice_fdir_replay_flows(struct ice_hw *hw);
|
|
|
|
void ice_fdir_replay_fltrs(struct ice_pf *pf);
|
2020-05-12 09:01:40 +08:00
|
|
|
int ice_fdir_create_dflt_rules(struct ice_pf *pf);
|
ice: implement device flash update via devlink
Use the newly added pldmfw library to implement device flash update for
the Intel ice networking device driver. This support uses the devlink
flash update interface.
The main parts of the flash include the Option ROM, the netlist module,
and the main NVM data. The PLDM firmware file contains modules for each
of these components.
Using the pldmfw library, the provided firmware file will be scanned for
the three major components, "fw.undi" for the Option ROM, "fw.mgmt" for
the main NVM module containing the primary device firmware, and
"fw.netlist" containing the netlist module.
The flash is separated into two banks, the active bank containing the
running firmware, and the inactive bank which we use for update. Each
module is updated in a staged process. First, the inactive bank is
erased, preparing the device for update. Second, the contents of the
component are copied to the inactive portion of the flash. After all
components are updated, the driver signals the device to switch the
active bank during the next EMP reset (which would usually occur during
the next reboot).
Although the firmware AdminQ interface does report an immediate status
for each command, the NVM erase and NVM write commands receive status
asynchronously. The driver must not continue writing until previous
erase and write commands have finished. The real status of the NVM
commands is returned over the receive AdminQ. Implement a simple
interface that uses a wait queue so that the main update thread can
sleep until the completion status is reported by firmware. For erasing
the inactive banks, this can take quite a while in practice.
To help visualize the process to the devlink application and other
applications based on the devlink netlink interface, status is reported
via the devlink_flash_update_status_notify. While we do report status
after each 4k block when writing, there is no real status we can report
during erasing. We simply must wait for the complete module erasure to
finish.
With this implementation, basic flash update for the ice hardware is
supported.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-24 08:22:03 +08:00
|
|
|
int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
|
|
|
|
struct ice_rq_event_info *event);
|
2019-04-17 01:30:43 +08:00
|
|
|
int ice_open(struct net_device *netdev);
|
2021-02-27 05:19:26 +08:00
|
|
|
int ice_open_internal(struct net_device *netdev);
|
2019-04-17 01:30:43 +08:00
|
|
|
int ice_stop(struct net_device *netdev);
|
2020-05-12 09:01:46 +08:00
|
|
|
void ice_service_task_schedule(struct ice_pf *pf);
|
2018-03-20 22:58:15 +08:00
|
|
|
|
2021-05-20 22:37:49 +08:00
|
|
|
/**
|
|
|
|
* ice_set_rdma_cap - enable RDMA support
|
|
|
|
* @pf: PF struct
|
|
|
|
*/
|
|
|
|
static inline void ice_set_rdma_cap(struct ice_pf *pf)
|
|
|
|
{
|
2021-05-20 22:37:51 +08:00
|
|
|
if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) {
|
2021-05-20 22:37:49 +08:00
|
|
|
set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
|
2021-09-09 23:12:23 +08:00
|
|
|
set_bit(ICE_FLAG_AUX_ENA, pf->flags);
|
2021-05-20 22:37:51 +08:00
|
|
|
ice_plug_aux_dev(pf);
|
|
|
|
}
|
2021-05-20 22:37:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ice_clear_rdma_cap - disable RDMA support
|
|
|
|
* @pf: PF struct
|
|
|
|
*/
|
|
|
|
static inline void ice_clear_rdma_cap(struct ice_pf *pf)
|
|
|
|
{
|
2021-05-20 22:37:51 +08:00
|
|
|
ice_unplug_aux_dev(pf);
|
2021-05-20 22:37:49 +08:00
|
|
|
clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
|
2021-09-09 23:12:23 +08:00
|
|
|
clear_bit(ICE_FLAG_AUX_ENA, pf->flags);
|
2021-05-20 22:37:49 +08:00
|
|
|
}
|
2018-03-20 22:58:05 +08:00
|
|
|
#endif /* _ICE_H_ */
|